fix(gateway): add pre-sanitize + move Python check outside retry loop

Hybrid config repair approach: 1. Pre-sanitize: Add sanitizeOpenClawConfig() using a conservative blocklist approach to remove known-invalid keys (e.g. skills.enabled at root level) BEFORE starting the Gateway. Uses blocklist instead of allowlist for forward-compatibility — new valid keys added by future OpenClaw versions are never stripped. 2. Reactive fallback: The existing doctor auto-repair mechanism catches any OTHER config validation errors, runs openclaw doctor --fix, and retries once. 3. Move Python readiness check outside the while loop since it's fire-and-forget and only needs to run once per start() call. Also adds comprehensive unit tests for the sanitization logic. Co-authored-by: Haze <hazeone@users.noreply.github.com>
2026-03-01 06:17:31 +00:00
parent 19ac6afe7d
commit 75351a9a2d
3 changed files with 298 additions and 15 deletions
--- a/electron/gateway/manager.ts
+++ b/electron/gateway/manager.ts
@@ -31,7 +31,7 @@ import {
  buildDeviceAuthPayload,
  type DeviceIdentity,
 } from '../utils/device-identity';
-import { syncGatewayTokenToConfig, syncBrowserConfigToOpenClaw } from '../utils/openclaw-auth';
+import { syncGatewayTokenToConfig, syncBrowserConfigToOpenClaw, sanitizeOpenClawConfig } from '../utils/openclaw-auth';
 import { shouldAttemptConfigAutoRepair } from './startup-recovery';

 /**
@@ -292,25 +292,24 @@ export class GatewayManager extends EventEmitter {
    this.reconnectAttempts = 0;
    this.setStatus({ state: 'starting', reconnectAttempts: 0 });
    let configRepairAttempted = false;
+
+    // Check if Python environment is ready (self-healing) asynchronously.
+    // Fire-and-forget: only needs to run once, not on every retry.
+    void isPythonReady().then(pythonReady => {
+      if (!pythonReady) {
+        logger.info('Python environment missing or incomplete, attempting background repair...');
+        void setupManagedPython().catch(err => {
+          logger.error('Background Python repair failed:', err);
+        });
+      }
+    }).catch(err => {
+      logger.error('Failed to check Python environment:', err);
+    });
    
    try {
      while (true) {
        this.recentStartupStderrLines = [];
        try {
-          // Check if Python environment is ready (self-healing) asynchronously
-          void isPythonReady().then(pythonReady => {
-            if (!pythonReady) {
-              logger.info('Python environment missing or incomplete, attempting background repair...');
-              // We don't await this to avoid blocking Gateway startup,
-              // as uv run will handle it if needed, but this pre-warms it.
-              void setupManagedPython().catch(err => {
-                logger.error('Background Python repair failed:', err);
-              });
-            }
-          }).catch(err => {
-            logger.error('Failed to check Python environment:', err);
-          });
-
          // Check if Gateway is already running
          logger.debug('Checking for existing Gateway...');
          const existing = await this.findExistingGateway();
@@ -874,6 +873,17 @@ export class GatewayManager extends EventEmitter {
    // Get or generate gateway token
    const gatewayToken = await getSetting('gatewayToken');

+    // Strip stale/invalid keys from openclaw.json that would cause the
+    // Gateway's strict config validation to reject the file on startup
+    // (e.g. `skills.enabled` left by an older version).
+    // This is a fast file-based pre-check; the reactive auto-repair
+    // mechanism (runOpenClawDoctorRepair) handles any remaining issues.
+    try {
+      await sanitizeOpenClawConfig();
+    } catch (err) {
+      logger.warn('Failed to sanitize openclaw.json:', err);
+    }
+
    // Write our token into openclaw.json before starting the process.
    // Without --dev the gateway authenticates using the token in
    // openclaw.json; if that file has a stale token (e.g. left by the