Auto-repair invalid OpenClaw config on gateway startup
Co-authored-by: Haze <hazeone@users.noreply.github.com>
This commit is contained in:
@@ -32,6 +32,7 @@ import {
|
|||||||
type DeviceIdentity,
|
type DeviceIdentity,
|
||||||
} from '../utils/device-identity';
|
} from '../utils/device-identity';
|
||||||
import { syncGatewayTokenToConfig, syncBrowserConfigToOpenClaw } from '../utils/openclaw-auth';
|
import { syncGatewayTokenToConfig, syncBrowserConfigToOpenClaw } from '../utils/openclaw-auth';
|
||||||
|
import { shouldAttemptConfigAutoRepair } from './startup-recovery';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gateway connection status
|
* Gateway connection status
|
||||||
@@ -176,6 +177,7 @@ export class GatewayManager extends EventEmitter {
|
|||||||
private shouldReconnect = true;
|
private shouldReconnect = true;
|
||||||
private startLock = false;
|
private startLock = false;
|
||||||
private lastSpawnSummary: string | null = null;
|
private lastSpawnSummary: string | null = null;
|
||||||
|
private recentStartupStderrLines: string[] = [];
|
||||||
private pendingRequests: Map<string, {
|
private pendingRequests: Map<string, {
|
||||||
resolve: (value: unknown) => void;
|
resolve: (value: unknown) => void;
|
||||||
reject: (error: Error) => void;
|
reject: (error: Error) => void;
|
||||||
@@ -232,6 +234,16 @@ export class GatewayManager extends EventEmitter {
|
|||||||
|
|
||||||
return { level: 'warn', normalized: msg };
|
return { level: 'warn', normalized: msg };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private recordStartupStderrLine(line: string): void {
|
||||||
|
const normalized = line.trim();
|
||||||
|
if (!normalized) return;
|
||||||
|
this.recentStartupStderrLines.push(normalized);
|
||||||
|
const MAX_STDERR_LINES = 120;
|
||||||
|
if (this.recentStartupStderrLines.length > MAX_STDERR_LINES) {
|
||||||
|
this.recentStartupStderrLines.splice(0, this.recentStartupStderrLines.length - MAX_STDERR_LINES);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get current Gateway status
|
* Get current Gateway status
|
||||||
@@ -279,49 +291,71 @@ export class GatewayManager extends EventEmitter {
|
|||||||
|
|
||||||
this.reconnectAttempts = 0;
|
this.reconnectAttempts = 0;
|
||||||
this.setStatus({ state: 'starting', reconnectAttempts: 0 });
|
this.setStatus({ state: 'starting', reconnectAttempts: 0 });
|
||||||
|
let configRepairAttempted = false;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Check if Python environment is ready (self-healing) asynchronously
|
while (true) {
|
||||||
void isPythonReady().then(pythonReady => {
|
this.recentStartupStderrLines = [];
|
||||||
if (!pythonReady) {
|
try {
|
||||||
logger.info('Python environment missing or incomplete, attempting background repair...');
|
// Check if Python environment is ready (self-healing) asynchronously
|
||||||
// We don't await this to avoid blocking Gateway startup,
|
void isPythonReady().then(pythonReady => {
|
||||||
// as uv run will handle it if needed, but this pre-warms it.
|
if (!pythonReady) {
|
||||||
void setupManagedPython().catch(err => {
|
logger.info('Python environment missing or incomplete, attempting background repair...');
|
||||||
logger.error('Background Python repair failed:', err);
|
// We don't await this to avoid blocking Gateway startup,
|
||||||
|
// as uv run will handle it if needed, but this pre-warms it.
|
||||||
|
void setupManagedPython().catch(err => {
|
||||||
|
logger.error('Background Python repair failed:', err);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}).catch(err => {
|
||||||
|
logger.error('Failed to check Python environment:', err);
|
||||||
});
|
});
|
||||||
}
|
|
||||||
}).catch(err => {
|
|
||||||
logger.error('Failed to check Python environment:', err);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Check if Gateway is already running
|
// Check if Gateway is already running
|
||||||
logger.debug('Checking for existing Gateway...');
|
logger.debug('Checking for existing Gateway...');
|
||||||
const existing = await this.findExistingGateway();
|
const existing = await this.findExistingGateway();
|
||||||
if (existing) {
|
if (existing) {
|
||||||
logger.debug(`Found existing Gateway on port ${existing.port}`);
|
logger.debug(`Found existing Gateway on port ${existing.port}`);
|
||||||
await this.connect(existing.port, existing.externalToken);
|
await this.connect(existing.port, existing.externalToken);
|
||||||
this.ownsProcess = false;
|
this.ownsProcess = false;
|
||||||
this.setStatus({ pid: undefined });
|
this.setStatus({ pid: undefined });
|
||||||
this.startHealthCheck();
|
this.startHealthCheck();
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug('No existing Gateway found, starting new process...');
|
||||||
|
|
||||||
|
// Start new Gateway process
|
||||||
|
await this.startProcess();
|
||||||
|
|
||||||
|
// Wait for Gateway to be ready
|
||||||
|
await this.waitForReady();
|
||||||
|
|
||||||
|
// Connect WebSocket
|
||||||
|
await this.connect(this.status.port);
|
||||||
|
|
||||||
|
// Start health monitoring
|
||||||
|
this.startHealthCheck();
|
||||||
|
logger.debug('Gateway started successfully');
|
||||||
|
return;
|
||||||
|
} catch (error) {
|
||||||
|
if (shouldAttemptConfigAutoRepair(error, this.recentStartupStderrLines, configRepairAttempted)) {
|
||||||
|
configRepairAttempted = true;
|
||||||
|
logger.warn(
|
||||||
|
'Detected invalid OpenClaw config during Gateway startup; running doctor repair before retry'
|
||||||
|
);
|
||||||
|
const repaired = await this.runOpenClawDoctorRepair();
|
||||||
|
if (repaired) {
|
||||||
|
logger.info('OpenClaw doctor repair completed; retrying Gateway startup');
|
||||||
|
this.setStatus({ state: 'starting', error: undefined, reconnectAttempts: 0 });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
logger.error('OpenClaw doctor repair failed; not retrying Gateway startup');
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug('No existing Gateway found, starting new process...');
|
|
||||||
|
|
||||||
// Start new Gateway process
|
|
||||||
await this.startProcess();
|
|
||||||
|
|
||||||
// Wait for Gateway to be ready
|
|
||||||
await this.waitForReady();
|
|
||||||
|
|
||||||
// Connect WebSocket
|
|
||||||
await this.connect(this.status.port);
|
|
||||||
|
|
||||||
// Start health monitoring
|
|
||||||
this.startHealthCheck();
|
|
||||||
logger.debug('Gateway started successfully');
|
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(
|
logger.error(
|
||||||
`Gateway start failed (port=${this.status.port}, reconnectAttempts=${this.reconnectAttempts}, spawn=${this.lastSpawnSummary ?? 'n/a'})`,
|
`Gateway start failed (port=${this.status.port}, reconnectAttempts=${this.reconnectAttempts}, spawn=${this.lastSpawnSummary ?? 'n/a'})`,
|
||||||
@@ -709,6 +743,115 @@ export class GatewayManager extends EventEmitter {
|
|||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempt to repair invalid OpenClaw config using the built-in doctor command.
|
||||||
|
* Returns true when doctor exits successfully.
|
||||||
|
*/
|
||||||
|
private async runOpenClawDoctorRepair(): Promise<boolean> {
|
||||||
|
const openclawDir = getOpenClawDir();
|
||||||
|
const entryScript = getOpenClawEntryPath();
|
||||||
|
if (!existsSync(entryScript)) {
|
||||||
|
logger.error(`Cannot run OpenClaw doctor repair: entry script not found at ${entryScript}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const platform = process.platform;
|
||||||
|
const arch = process.arch;
|
||||||
|
const target = `${platform}-${arch}`;
|
||||||
|
const binPath = app.isPackaged
|
||||||
|
? path.join(process.resourcesPath, 'bin')
|
||||||
|
: path.join(process.cwd(), 'resources', 'bin', target);
|
||||||
|
const binPathExists = existsSync(binPath);
|
||||||
|
const finalPath = binPathExists
|
||||||
|
? `${binPath}${path.delimiter}${process.env.PATH || ''}`
|
||||||
|
: process.env.PATH || '';
|
||||||
|
|
||||||
|
const uvEnv = await getUvMirrorEnv();
|
||||||
|
const command = app.isPackaged ? getNodeExecutablePath() : 'node';
|
||||||
|
const args = [entryScript, 'doctor', '--fix', '--yes', '--non-interactive'];
|
||||||
|
logger.info(
|
||||||
|
`Running OpenClaw doctor repair (command="${command}", args="${args.join(' ')}", cwd="${openclawDir}", bundledBin=${binPathExists ? 'yes' : 'no'})`
|
||||||
|
);
|
||||||
|
|
||||||
|
return new Promise<boolean>((resolve) => {
|
||||||
|
const spawnEnv: Record<string, string | undefined> = {
|
||||||
|
...process.env,
|
||||||
|
PATH: finalPath,
|
||||||
|
...uvEnv,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (app.isPackaged) {
|
||||||
|
spawnEnv['ELECTRON_RUN_AS_NODE'] = '1';
|
||||||
|
spawnEnv['OPENCLAW_NO_RESPAWN'] = '1';
|
||||||
|
const existingNodeOpts = spawnEnv['NODE_OPTIONS'] ?? '';
|
||||||
|
if (!existingNodeOpts.includes('--disable-warning=ExperimentalWarning') &&
|
||||||
|
!existingNodeOpts.includes('--no-warnings')) {
|
||||||
|
spawnEnv['NODE_OPTIONS'] = `${existingNodeOpts} --disable-warning=ExperimentalWarning`.trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const child = spawn(command, args, {
|
||||||
|
cwd: openclawDir,
|
||||||
|
stdio: ['ignore', 'pipe', 'pipe'],
|
||||||
|
detached: false,
|
||||||
|
shell: false,
|
||||||
|
env: spawnEnv,
|
||||||
|
});
|
||||||
|
|
||||||
|
let settled = false;
|
||||||
|
const finish = (ok: boolean) => {
|
||||||
|
if (settled) return;
|
||||||
|
settled = true;
|
||||||
|
resolve(ok);
|
||||||
|
};
|
||||||
|
|
||||||
|
const timeout = setTimeout(() => {
|
||||||
|
logger.error('OpenClaw doctor repair timed out after 120000ms');
|
||||||
|
try {
|
||||||
|
child.kill('SIGTERM');
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
finish(false);
|
||||||
|
}, 120000);
|
||||||
|
|
||||||
|
child.on('error', (err) => {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
logger.error('Failed to spawn OpenClaw doctor repair process:', err);
|
||||||
|
finish(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
child.stdout?.on('data', (data) => {
|
||||||
|
const raw = data.toString();
|
||||||
|
for (const line of raw.split(/\r?\n/)) {
|
||||||
|
const normalized = line.trim();
|
||||||
|
if (!normalized) continue;
|
||||||
|
logger.debug(`[Gateway doctor stdout] ${normalized}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
child.stderr?.on('data', (data) => {
|
||||||
|
const raw = data.toString();
|
||||||
|
for (const line of raw.split(/\r?\n/)) {
|
||||||
|
const normalized = line.trim();
|
||||||
|
if (!normalized) continue;
|
||||||
|
logger.warn(`[Gateway doctor stderr] ${normalized}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
child.on('exit', (code, signal) => {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
if (code === 0) {
|
||||||
|
logger.info('OpenClaw doctor repair completed successfully');
|
||||||
|
finish(true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
logger.warn(`OpenClaw doctor repair exited (${this.formatExit(code, signal)})`);
|
||||||
|
finish(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start Gateway process
|
* Start Gateway process
|
||||||
@@ -924,6 +1067,7 @@ export class GatewayManager extends EventEmitter {
|
|||||||
child.stderr?.on('data', (data) => {
|
child.stderr?.on('data', (data) => {
|
||||||
const raw = data.toString();
|
const raw = data.toString();
|
||||||
for (const line of raw.split(/\r?\n/)) {
|
for (const line of raw.split(/\r?\n/)) {
|
||||||
|
this.recordStartupStderrLine(line);
|
||||||
const classified = this.classifyStderrMessage(line);
|
const classified = this.classifyStderrMessage(line);
|
||||||
if (classified.level === 'drop') continue;
|
if (classified.level === 'drop') continue;
|
||||||
if (classified.level === 'debug') {
|
if (classified.level === 'debug') {
|
||||||
|
|||||||
60
electron/gateway/startup-recovery.ts
Normal file
60
electron/gateway/startup-recovery.ts
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
/**
|
||||||
|
* Gateway startup recovery heuristics.
|
||||||
|
*
|
||||||
|
* This module is intentionally dependency-free so it can be unit-tested
|
||||||
|
* without Electron/runtime mocks.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const INVALID_CONFIG_PATTERNS: RegExp[] = [
|
||||||
|
/\binvalid config\b/i,
|
||||||
|
/\bconfig invalid\b/i,
|
||||||
|
/\bunrecognized key\b/i,
|
||||||
|
/\brun:\s*openclaw doctor --fix\b/i,
|
||||||
|
];
|
||||||
|
|
||||||
|
function normalizeLogLine(value: string): string {
|
||||||
|
return value.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true when text appears to indicate OpenClaw config validation failure.
|
||||||
|
*/
|
||||||
|
export function isInvalidConfigSignal(text: string): boolean {
|
||||||
|
const normalized = normalizeLogLine(text);
|
||||||
|
if (!normalized) return false;
|
||||||
|
return INVALID_CONFIG_PATTERNS.some((pattern) => pattern.test(normalized));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true when either startup stderr lines or startup error message
|
||||||
|
* indicate an OpenClaw config validation failure.
|
||||||
|
*/
|
||||||
|
export function hasInvalidConfigFailureSignal(
|
||||||
|
startupError: unknown,
|
||||||
|
startupStderrLines: string[],
|
||||||
|
): boolean {
|
||||||
|
for (const line of startupStderrLines) {
|
||||||
|
if (isInvalidConfigSignal(line)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const errorText = startupError instanceof Error
|
||||||
|
? `${startupError.name}: ${startupError.message}`
|
||||||
|
: String(startupError ?? '');
|
||||||
|
|
||||||
|
return isInvalidConfigSignal(errorText);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retry guard for one-time config repair during a single startup flow.
|
||||||
|
*/
|
||||||
|
export function shouldAttemptConfigAutoRepair(
|
||||||
|
startupError: unknown,
|
||||||
|
startupStderrLines: string[],
|
||||||
|
alreadyAttempted: boolean,
|
||||||
|
): boolean {
|
||||||
|
if (alreadyAttempted) return false;
|
||||||
|
return hasInvalidConfigFailureSignal(startupError, startupStderrLines);
|
||||||
|
}
|
||||||
|
|
||||||
52
tests/unit/gateway-startup-recovery.test.ts
Normal file
52
tests/unit/gateway-startup-recovery.test.ts
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import {
|
||||||
|
hasInvalidConfigFailureSignal,
|
||||||
|
isInvalidConfigSignal,
|
||||||
|
shouldAttemptConfigAutoRepair,
|
||||||
|
} from '@electron/gateway/startup-recovery';
|
||||||
|
|
||||||
|
describe('gateway startup recovery heuristics', () => {
|
||||||
|
it('detects invalid-config signal from stderr lines', () => {
|
||||||
|
const lines = [
|
||||||
|
'Invalid config at C:\\Users\\pc\\.openclaw\\openclaw.json:\\n- skills: Unrecognized key: "enabled"',
|
||||||
|
'Run: openclaw doctor --fix',
|
||||||
|
];
|
||||||
|
expect(hasInvalidConfigFailureSignal(new Error('gateway start failed'), lines)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('detects invalid-config signal from error message fallback', () => {
|
||||||
|
expect(
|
||||||
|
hasInvalidConfigFailureSignal(
|
||||||
|
new Error('Config invalid. Run: openclaw doctor --fix'),
|
||||||
|
[],
|
||||||
|
),
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not treat unrelated startup failures as invalid-config failures', () => {
|
||||||
|
const lines = [
|
||||||
|
'Gateway process exited (code=1, expected=no)',
|
||||||
|
'WebSocket closed before handshake',
|
||||||
|
];
|
||||||
|
expect(
|
||||||
|
hasInvalidConfigFailureSignal(
|
||||||
|
new Error('Gateway process exited before becoming ready (code=1)'),
|
||||||
|
lines,
|
||||||
|
),
|
||||||
|
).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('attempts auto-repair only once per startup flow', () => {
|
||||||
|
const lines = ['Config invalid', '- skills: Unrecognized key: "enabled"'];
|
||||||
|
expect(shouldAttemptConfigAutoRepair(new Error('start failed'), lines, false)).toBe(true);
|
||||||
|
expect(shouldAttemptConfigAutoRepair(new Error('start failed'), lines, true)).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('matches common invalid-config phrases robustly', () => {
|
||||||
|
expect(isInvalidConfigSignal('Config invalid')).toBe(true);
|
||||||
|
expect(isInvalidConfigSignal('skills: Unrecognized key: "enabled"')).toBe(true);
|
||||||
|
expect(isInvalidConfigSignal('Run: openclaw doctor --fix')).toBe(true);
|
||||||
|
expect(isInvalidConfigSignal('Gateway ready after 3 attempts')).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
Reference in New Issue
Block a user