fix(gateway): add pre-sanitize + move Python check outside retry loop

Hybrid config repair approach: 1. Pre-sanitize: Add sanitizeOpenClawConfig() using a conservative blocklist approach to remove known-invalid keys (e.g. skills.enabled at root level) BEFORE starting the Gateway. Uses blocklist instead of allowlist for forward-compatibility — new valid keys added by future OpenClaw versions are never stripped. 2. Reactive fallback: The existing doctor auto-repair mechanism catches any OTHER config validation errors, runs openclaw doctor --fix, and retries once. 3. Move Python readiness check outside the while loop since it's fire-and-forget and only needs to run once per start() call. Also adds comprehensive unit tests for the sanitization logic. Co-authored-by: Haze <hazeone@users.noreply.github.com>
2026-03-01 06:17:31 +00:00
parent 19ac6afe7d
commit 75351a9a2d
3 changed files with 298 additions and 15 deletions
--- a/electron/gateway/manager.ts
+++ b/electron/gateway/manager.ts
@@ -31,7 +31,7 @@ import {
  buildDeviceAuthPayload,
  type DeviceIdentity,
 } from '../utils/device-identity';
-import { syncGatewayTokenToConfig, syncBrowserConfigToOpenClaw } from '../utils/openclaw-auth';
+import { syncGatewayTokenToConfig, syncBrowserConfigToOpenClaw, sanitizeOpenClawConfig } from '../utils/openclaw-auth';
 import { shouldAttemptConfigAutoRepair } from './startup-recovery';

 /**
@@ -292,25 +292,24 @@ export class GatewayManager extends EventEmitter {
    this.reconnectAttempts = 0;
    this.setStatus({ state: 'starting', reconnectAttempts: 0 });
    let configRepairAttempted = false;
+
+    // Check if Python environment is ready (self-healing) asynchronously.
+    // Fire-and-forget: only needs to run once, not on every retry.
+    void isPythonReady().then(pythonReady => {
+      if (!pythonReady) {
+        logger.info('Python environment missing or incomplete, attempting background repair...');
+        void setupManagedPython().catch(err => {
+          logger.error('Background Python repair failed:', err);
+        });
+      }
+    }).catch(err => {
+      logger.error('Failed to check Python environment:', err);
+    });
    
    try {
      while (true) {
        this.recentStartupStderrLines = [];
        try {
-          // Check if Python environment is ready (self-healing) asynchronously
-          void isPythonReady().then(pythonReady => {
-            if (!pythonReady) {
-              logger.info('Python environment missing or incomplete, attempting background repair...');
-              // We don't await this to avoid blocking Gateway startup,
-              // as uv run will handle it if needed, but this pre-warms it.
-              void setupManagedPython().catch(err => {
-                logger.error('Background Python repair failed:', err);
-              });
-            }
-          }).catch(err => {
-            logger.error('Failed to check Python environment:', err);
-          });
-
          // Check if Gateway is already running
          logger.debug('Checking for existing Gateway...');
          const existing = await this.findExistingGateway();
@@ -874,6 +873,17 @@ export class GatewayManager extends EventEmitter {
    // Get or generate gateway token
    const gatewayToken = await getSetting('gatewayToken');

+    // Strip stale/invalid keys from openclaw.json that would cause the
+    // Gateway's strict config validation to reject the file on startup
+    // (e.g. `skills.enabled` left by an older version).
+    // This is a fast file-based pre-check; the reactive auto-repair
+    // mechanism (runOpenClawDoctorRepair) handles any remaining issues.
+    try {
+      await sanitizeOpenClawConfig();
+    } catch (err) {
+      logger.warn('Failed to sanitize openclaw.json:', err);
+    }
+
    // Write our token into openclaw.json before starting the process.
    // Without --dev the gateway authenticates using the token in
    // openclaw.json; if that file has a stale token (e.g. left by the
--- a/electron/utils/openclaw-auth.ts
+++ b/electron/utils/openclaw-auth.ts
@@ -717,4 +717,51 @@ export async function updateAgentModelProvider(
  }
 }

+/**
+ * Sanitize ~/.openclaw/openclaw.json before Gateway start.
+ *
+ * Removes known-invalid keys that cause OpenClaw's strict Zod validation
+ * to reject the entire config on startup.  Uses a conservative **blocklist**
+ * approach: only strips keys that are KNOWN to be misplaced by older
+ * OpenClaw/ClawX versions or external tools.
+ *
+ * Why blocklist instead of allowlist?
+ *   • Allowlist (e.g. `VALID_SKILLS_KEYS`) would strip any NEW valid keys
+ *     added by future OpenClaw releases — a forward-compatibility hazard.
+ *   • Blocklist only removes keys we positively know are wrong, so new
+ *     valid keys are never touched.
+ *
+ * This is a fast, file-based pre-check.  For comprehensive repair of
+ * unknown or future config issues, the reactive auto-repair mechanism
+ * (`runOpenClawDoctorRepair`) runs `openclaw doctor --fix` as a fallback.
+ */
+export async function sanitizeOpenClawConfig(): Promise<void> {
+  const config = await readOpenClawJson();
+  let modified = false;
+
+  // ── skills section ──────────────────────────────────────────────
+  // OpenClaw's Zod schema uses .strict() on the skills object, accepting
+  // only: allowBundled, load, install, limits, entries.
+  // The key "enabled" belongs inside skills.entries[key].enabled, NOT at
+  // the skills root level.  Older versions may have placed it there.
+  const skills = config.skills;
+  if (skills && typeof skills === 'object' && !Array.isArray(skills)) {
+    const skillsObj = skills as Record<string, unknown>;
+    // Keys that are known to be invalid at the skills root level.
+    const KNOWN_INVALID_SKILLS_ROOT_KEYS = ['enabled', 'disabled'];
+    for (const key of KNOWN_INVALID_SKILLS_ROOT_KEYS) {
+      if (key in skillsObj) {
+        console.log(`[sanitize] Removing misplaced key "skills.${key}" from openclaw.json`);
+        delete skillsObj[key];
+        modified = true;
+      }
+    }
+  }
+
+  if (modified) {
+    await writeOpenClawJson(config);
+    console.log('[sanitize] openclaw.json sanitized successfully');
+  }
+}
+
 export { getProviderEnvVar } from './provider-registry';
--- a/tests/unit/sanitize-config.test.ts
+++ b/tests/unit/sanitize-config.test.ts
@@ -0,0 +1,226 @@
+/**
+ * Tests for openclaw.json config sanitization before Gateway start.
+ *
+ * The sanitizeOpenClawConfig() function in openclaw-auth.ts relies on
+ * Electron-specific helpers (readOpenClawJson / writeOpenClawJson) that
+ * read from ~/.openclaw/openclaw.json.  To avoid mocking Electron + the
+ * real HOME directory, this test uses a standalone version of the
+ * sanitization logic that mirrors the production code exactly, operating
+ * on a temp directory with real file I/O.
+ */
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { mkdtemp, writeFile, readFile, rm } from 'fs/promises';
+import { join } from 'path';
+import { tmpdir } from 'os';
+
+let tempDir: string;
+let configPath: string;
+
+async function writeConfig(data: unknown): Promise<void> {
+  await writeFile(configPath, JSON.stringify(data, null, 2), 'utf-8');
+}
+
+async function readConfig(): Promise<Record<string, unknown>> {
+  const raw = await readFile(configPath, 'utf-8');
+  return JSON.parse(raw);
+}
+
+/**
+ * Standalone mirror of the sanitization logic in openclaw-auth.ts.
+ * Uses the same blocklist approach as the production code.
+ */
+async function sanitizeConfig(filePath: string): Promise<boolean> {
+  let raw: string;
+  try {
+    raw = await readFile(filePath, 'utf-8');
+  } catch {
+    return false;
+  }
+
+  const config = JSON.parse(raw) as Record<string, unknown>;
+  let modified = false;
+
+  // Mirror of the production blocklist logic
+  const skills = config.skills;
+  if (skills && typeof skills === 'object' && !Array.isArray(skills)) {
+    const skillsObj = skills as Record<string, unknown>;
+    const KNOWN_INVALID_SKILLS_ROOT_KEYS = ['enabled', 'disabled'];
+    for (const key of KNOWN_INVALID_SKILLS_ROOT_KEYS) {
+      if (key in skillsObj) {
+        delete skillsObj[key];
+        modified = true;
+      }
+    }
+  }
+
+  if (modified) {
+    await writeFile(filePath, JSON.stringify(config, null, 2), 'utf-8');
+  }
+  return modified;
+}
+
+beforeEach(async () => {
+  tempDir = await mkdtemp(join(tmpdir(), 'clawx-test-'));
+  configPath = join(tempDir, 'openclaw.json');
+});
+
+afterEach(async () => {
+  await rm(tempDir, { recursive: true, force: true });
+});
+
+describe('sanitizeOpenClawConfig (blocklist approach)', () => {
+  it('removes skills.enabled at the root level of skills', async () => {
+    await writeConfig({
+      skills: {
+        enabled: true,
+        entries: {
+          'my-skill': { enabled: true, apiKey: 'abc' },
+        },
+      },
+      gateway: { mode: 'local' },
+    });
+
+    const modified = await sanitizeConfig(configPath);
+    expect(modified).toBe(true);
+
+    const result = await readConfig();
+    // Root-level "enabled" should be gone
+    expect(result.skills).not.toHaveProperty('enabled');
+    // entries[key].enabled must be preserved
+    const skills = result.skills as Record<string, unknown>;
+    const entries = skills.entries as Record<string, Record<string, unknown>>;
+    expect(entries['my-skill'].enabled).toBe(true);
+    expect(entries['my-skill'].apiKey).toBe('abc');
+    // Other top-level sections are untouched
+    expect(result.gateway).toEqual({ mode: 'local' });
+  });
+
+  it('removes skills.disabled at the root level of skills', async () => {
+    await writeConfig({
+      skills: {
+        disabled: false,
+        entries: { 'x': { enabled: false } },
+      },
+    });
+
+    const modified = await sanitizeConfig(configPath);
+    expect(modified).toBe(true);
+
+    const result = await readConfig();
+    expect(result.skills).not.toHaveProperty('disabled');
+    const skills = result.skills as Record<string, unknown>;
+    const entries = skills.entries as Record<string, Record<string, unknown>>;
+    expect(entries['x'].enabled).toBe(false);
+  });
+
+  it('removes both enabled and disabled when present together', async () => {
+    await writeConfig({
+      skills: {
+        enabled: true,
+        disabled: false,
+        entries: { 'a': { enabled: true } },
+        allowBundled: ['web-search'],
+      },
+    });
+
+    const modified = await sanitizeConfig(configPath);
+    expect(modified).toBe(true);
+
+    const result = await readConfig();
+    const skills = result.skills as Record<string, unknown>;
+    expect(skills).not.toHaveProperty('enabled');
+    expect(skills).not.toHaveProperty('disabled');
+    // Valid keys are preserved
+    expect(skills.allowBundled).toEqual(['web-search']);
+    expect(skills.entries).toBeDefined();
+  });
+
+  it('does nothing when config is already valid', async () => {
+    const original = {
+      skills: {
+        entries: { 'my-skill': { enabled: true } },
+        allowBundled: ['web-search'],
+      },
+    };
+    await writeConfig(original);
+
+    const modified = await sanitizeConfig(configPath);
+    expect(modified).toBe(false);
+
+    const result = await readConfig();
+    expect(result).toEqual(original);
+  });
+
+  it('preserves unknown valid keys (forward-compatible)', async () => {
+    // If OpenClaw adds new valid keys to skills in the future,
+    // the blocklist approach should NOT strip them.
+    const original = {
+      skills: {
+        entries: { 'x': { enabled: true } },
+        allowBundled: ['web-search'],
+        load: { extraDirs: ['/my/dir'], watch: true },
+        install: { preferBrew: false },
+        limits: { maxSkillsInPrompt: 5 },
+        futureNewKey: { some: 'value' },  // hypothetical future key
+      },
+    };
+    await writeConfig(original);
+
+    const modified = await sanitizeConfig(configPath);
+    expect(modified).toBe(false);
+
+    const result = await readConfig();
+    expect(result).toEqual(original);
+  });
+
+  it('handles config with no skills section', async () => {
+    const original = { gateway: { mode: 'local' } };
+    await writeConfig(original);
+
+    const modified = await sanitizeConfig(configPath);
+    expect(modified).toBe(false);
+  });
+
+  it('handles empty config', async () => {
+    await writeConfig({});
+
+    const modified = await sanitizeConfig(configPath);
+    expect(modified).toBe(false);
+  });
+
+  it('returns false for missing config file', async () => {
+    const modified = await sanitizeConfig(join(tempDir, 'nonexistent.json'));
+    expect(modified).toBe(false);
+  });
+
+  it('handles skills being an array (no-op, no crash)', async () => {
+    // Edge case: skills is not an object
+    await writeConfig({ skills: ['something'] });
+
+    const modified = await sanitizeConfig(configPath);
+    expect(modified).toBe(false);
+  });
+
+  it('preserves all other top-level config sections', async () => {
+    await writeConfig({
+      skills: { enabled: true, entries: {} },
+      channels: { discord: { token: 'abc', enabled: true } },
+      plugins: { entries: { whatsapp: { enabled: true } } },
+      gateway: { mode: 'local', auth: { token: 'xyz' } },
+      agents: { defaults: { model: { primary: 'gpt-4' } } },
+      models: { providers: { openai: { baseUrl: 'https://api.openai.com' } } },
+    });
+
+    const modified = await sanitizeConfig(configPath);
+    expect(modified).toBe(true);
+
+    const result = await readConfig();
+    // skills.enabled removed
+    expect(result.skills).not.toHaveProperty('enabled');
+    // All other sections unchanged
+    expect(result.channels).toEqual({ discord: { token: 'abc', enabled: true } });
+    expect(result.plugins).toEqual({ entries: { whatsapp: { enabled: true } } });
+    expect(result.gateway).toEqual({ mode: 'local', auth: { token: 'xyz' } });
+    expect(result.agents).toEqual({ defaults: { model: { primary: 'gpt-4' } } });
+  });
+});