Add unit tests for skill capabilities, skill planner, and UV setup

- Implement tests for random ID generation, ensuring preference for crypto.randomUUID. - Create tests for runtime context capabilities, validating the injection of enabled skill capabilities. - Add tests for skill capability parsing, including classification and command example extraction. - Introduce tests for the skill planner, verifying tool call planning based on user requests and attachment requirements. - Establish tests for UV setup, ensuring proper handling of Python installation scenarios and environment checks.
2026-04-24 17:02:59 +08:00
parent e11a2296cc
commit 4c61e93c3e
42 changed files with 12560 additions and 224 deletions
--- a/tests/chat-runtime-context.test.ts
+++ b/tests/chat-runtime-context.test.ts
@@ -4,10 +4,18 @@ import { beforeEach, describe, expect, it, vi } from 'vitest';

 const mocks = vi.hoisted(() => {
  const sessionMessages: any[] = [];
+  const activeRuns = new Map<string, { runId: string; abortController: AbortController }>();

  return {
    sessionMessages,
+    activeRuns,
    providerChat: vi.fn(),
+    providerGetCapabilities: vi.fn(),
+    getEnabledSkillCapabilities: vi.fn(() => []),
+    toolRuntimeRun: vi.fn(),
+    createChatToolRuntime: vi.fn(() => ({
+      run: mocks.toolRuntimeRun,
+    })),
    appendMessage: vi.fn((_: string, message: unknown) => {
      sessionMessages.push(message);
    }),
@@ -16,8 +24,13 @@ const mocks = vi.hoisted(() => {
      messages: [...sessionMessages],
      updatedAt: Date.now(),
    })),
-    setActiveRun: vi.fn(),
-    clearActiveRun: vi.fn(),
+    setActiveRun: vi.fn((sessionKey: string, runId: string, abortController: AbortController) => {
+      activeRuns.set(sessionKey, { runId, abortController });
+    }),
+    clearActiveRun: vi.fn((sessionKey: string) => {
+      activeRuns.delete(sessionKey);
+    }),
+    getActiveRun: vi.fn((sessionKey: string) => activeRuns.get(sessionKey)),
    appendTranscriptLine: vi.fn(),
    maybeHandleBrowserOpenMessage: vi.fn(() => false),
    maybeHandleSkillInstallMessage: vi.fn(() => false),
@@ -30,6 +43,7 @@ const mocks = vi.hoisted(() => {
 vi.mock('@electron/providers', () => ({
  createProvider: vi.fn(() => ({
    chat: mocks.providerChat,
+    getCapabilities: mocks.providerGetCapabilities,
  })),
 }));

@@ -53,6 +67,7 @@ vi.mock('../electron/gateway/session-store', () => ({
    getOrCreate: mocks.getOrCreate,
    setActiveRun: mocks.setActiveRun,
    clearActiveRun: mocks.clearActiveRun,
+    getActiveRun: mocks.getActiveRun,
  },
 }));

@@ -60,6 +75,18 @@ vi.mock('@electron/utils/token-usage-writer', () => ({
  appendTranscriptLine: mocks.appendTranscriptLine,
 }));

+vi.mock('../electron/gateway/skill-capability-registry', () => ({
+  getEnabledSkillCapabilities: mocks.getEnabledSkillCapabilities,
+}));
+
+vi.mock('../electron/gateway/chat-tooling', async () => {
+  const actual = await vi.importActual<typeof import('../electron/gateway/chat-tooling')>('../electron/gateway/chat-tooling');
+  return {
+    ...actual,
+    createChatToolRuntime: mocks.createChatToolRuntime,
+  };
+});
+
 vi.mock('../electron/gateway/browser-shortcut', () => ({
  maybeHandleBrowserOpenMessage: mocks.maybeHandleBrowserOpenMessage,
 }));
@@ -82,17 +109,62 @@ function createStream(chunks: Array<{ result?: string; usage?: unknown }>) {
  };
 }

-function flushAsyncTasks(): Promise<void> {
-  return new Promise((resolve) => setTimeout(resolve, 0));
+function flushAsyncTasks(iterations = 1): Promise<void> {
+  return new Promise((resolve) => {
+    const next = (remaining: number) => {
+      if (remaining <= 0) {
+        resolve();
+        return;
+      }
+
+      setTimeout(() => next(remaining - 1), 0);
+    };
+
+    next(iterations);
+  });
 }

+const spreadsheetCapability = {
+  skillKey: 'minimax-xlsx',
+  slug: 'minimax-xlsx',
+  name: 'MiniMax XLSX',
+  description: 'Analyze spreadsheet files such as .xlsx and .csv.',
+  enabled: true,
+  category: 'document',
+  allowedTools: [],
+  operationHints: ['read', 'analyze'],
+  triggerHints: ['spreadsheet', 'excel'],
+  inputExtensions: ['.xlsx', '.csv', '.tsv'],
+  requiredEnvVars: [],
+  requiresAuth: false,
+  plannerSummary: 'document skill; operations: read, analyze; inputs: .xlsx, .csv, .tsv',
+  renderHints: {
+    card: 'document-analysis',
+    preferredView: 'table',
+    skillType: 'spreadsheet',
+  },
+};
+
 describe('chat runtime context', () => {
  beforeEach(() => {
+    vi.resetModules();
    vi.clearAllMocks();
    mocks.sessionMessages.length = 0;
+    mocks.activeRuns.clear();
    mocks.maybeHandleBrowserOpenMessage.mockReturnValue(false);
    mocks.maybeHandleSkillInstallMessage.mockReturnValue(false);
+    mocks.getEnabledSkillCapabilities.mockReturnValue([]);
    mocks.providerChat.mockResolvedValue(createStream([{ result: 'done' }]));
+    mocks.providerGetCapabilities.mockReturnValue({
+      structuredMessages: false,
+      toolCalls: false,
+      toolResults: false,
+      thinking: false,
+    });
+    mocks.toolRuntimeRun.mockReset();
+    mocks.createChatToolRuntime.mockImplementation(() => ({
+      run: mocks.toolRuntimeRun,
+    }));
  });

  it('prepends the zn-ai runtime context before provider chat runs', async () => {
@@ -134,4 +206,212 @@ describe('chat runtime context', () => {
      content: expect.stringContaining('skills.install'),
    });
  });
+
+  it('persists tool_use -> tool_result -> final for planner-first spreadsheet execution', async () => {
+    mocks.getEnabledSkillCapabilities.mockReturnValue([spreadsheetCapability]);
+    mocks.providerChat.mockResolvedValue(createStream([{ result: 'Final answer from provider.' }]));
+    mocks.toolRuntimeRun.mockImplementation(async (invocation: { toolCallId: string; toolName: string; input: unknown }) => {
+      const payload = {
+        ok: true,
+        summary: 'Spreadsheet analysis completed.',
+        structuredData: {
+          reports: [{ filePath: 'C:\\tmp\\report.xlsx', rows: 3 }],
+        },
+        renderHints: {
+          card: 'document-analysis',
+          preferredView: 'table',
+          skillType: 'spreadsheet',
+        },
+        raw: {
+          reports: [{ filePath: 'C:\\tmp\\report.xlsx', rows: 3 }],
+        },
+      };
+
+      return {
+        preflight: {
+          ok: true,
+          status: 'ready',
+          toolCallId: invocation.toolCallId,
+          toolName: invocation.toolName,
+          normalizedInput: invocation.input,
+          summary: 'Ready to analyze the spreadsheet.',
+        },
+        execution: {
+          ok: true,
+          status: 'completed',
+          toolCallId: invocation.toolCallId,
+          toolName: invocation.toolName,
+          normalizedInput: invocation.input,
+          summary: 'Spreadsheet analysis completed.',
+          raw: payload.raw,
+          durationMs: 12,
+        },
+        normalized: {
+          ok: true,
+          status: 'completed',
+          toolCallId: invocation.toolCallId,
+          toolName: invocation.toolName,
+          summary: 'Spreadsheet analysis completed.',
+          payload,
+          block: {
+            type: 'tool_result',
+            toolCallId: invocation.toolCallId,
+            content: 'Spreadsheet analysis completed.',
+            result: payload,
+            summary: 'Spreadsheet analysis completed.',
+            ok: true,
+          },
+          transcriptMessage: {
+            role: 'tool_result',
+            content: [
+              {
+                type: 'tool_result',
+                toolCallId: invocation.toolCallId,
+                content: 'Spreadsheet analysis completed.',
+                result: payload,
+                summary: 'Spreadsheet analysis completed.',
+                ok: true,
+              },
+            ],
+            timestamp: Date.now(),
+            toolCallId: invocation.toolCallId,
+            toolName: invocation.toolName,
+            toolCall: {
+              id: invocation.toolCallId,
+              name: invocation.toolName,
+              input: invocation.input,
+              summary: 'Spreadsheet analysis completed.',
+            },
+            toolResult: payload,
+          },
+        },
+      };
+    });
+
+    const { handleChatSend } = await import('../electron/gateway/handlers/chat');
+    const broadcast = vi.fn();
+
+    const result = handleChatSend(
+      {
+        sessionKey: 'agent:test:main',
+        message: {
+          role: 'user',
+          content: 'Use minimax-xlsx to analyze this spreadsheet.',
+          _attachedFiles: [
+            {
+              fileName: 'report.xlsx',
+              mimeType: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+              fileSize: 2048,
+              preview: null,
+              filePath: 'C:\\tmp\\report.xlsx',
+              source: 'user-upload',
+            },
+          ],
+        },
+      },
+      broadcast,
+    );
+
+    expect(result.runId).toBeTypeOf('string');
+    expect(mocks.sessionMessages).toHaveLength(2);
+    expect(mocks.sessionMessages[1]).toEqual(expect.objectContaining({
+      role: 'assistant',
+      toolName: 'minimax-xlsx',
+      content: [
+        expect.objectContaining({
+          type: 'tool_use',
+          name: 'minimax-xlsx',
+        }),
+      ],
+    }));
+
+    await flushAsyncTasks(4);
+
+    expect(mocks.toolRuntimeRun).toHaveBeenCalledWith(
+      expect.objectContaining({
+        toolName: 'minimax-xlsx',
+        source: 'planner',
+      }),
+      expect.objectContaining({
+        sessionKey: 'agent:test:main',
+        runId: result.runId,
+        files: expect.arrayContaining([
+          expect.objectContaining({
+            filePath: 'C:\\tmp\\report.xlsx',
+          }),
+        ]),
+      }),
+    );
+    expect(mocks.providerChat).toHaveBeenCalledTimes(1);
+
+    const [messages, model] = mocks.providerChat.mock.calls[0] ?? [];
+    expect(model).toBe('gpt-4o-mini');
+    expect(messages).toEqual(expect.arrayContaining([
+      expect.objectContaining({
+        role: 'system',
+        content: expect.stringContaining('minimax-xlsx'),
+      }),
+      expect.objectContaining({
+        role: 'assistant',
+        content: [
+          expect.objectContaining({
+            type: 'tool_use',
+            name: 'minimax-xlsx',
+          }),
+        ],
+      }),
+      expect.objectContaining({
+        role: 'tool_result',
+        content: [
+          expect.objectContaining({
+            type: 'tool_result',
+            summary: 'Spreadsheet analysis completed.',
+          }),
+        ],
+      }),
+    ]));
+
+    expect(mocks.sessionMessages.map((message) => message.role)).toEqual([
+      'user',
+      'assistant',
+      'tool_result',
+      'assistant',
+    ]);
+    expect(mocks.sessionMessages[2]).toEqual(expect.objectContaining({
+      role: 'tool_result',
+      toolName: 'minimax-xlsx',
+      toolResult: expect.objectContaining({
+        summary: 'Spreadsheet analysis completed.',
+      }),
+      _toolStatuses: [
+        expect.objectContaining({
+          name: 'minimax-xlsx',
+          status: 'completed',
+          summary: 'Spreadsheet analysis completed.',
+        }),
+      ],
+    }));
+    expect(mocks.sessionMessages[3]).toEqual(expect.objectContaining({
+      role: 'assistant',
+      content: 'Final answer from provider.',
+    }));
+
+    expect(broadcast).toHaveBeenCalledWith(expect.objectContaining({
+      type: 'tool:status',
+      toolName: 'minimax-xlsx',
+      status: 'running',
+    }));
+    expect(broadcast).toHaveBeenCalledWith(expect.objectContaining({
+      type: 'tool:status',
+      toolName: 'minimax-xlsx',
+      status: 'completed',
+    }));
+    expect(broadcast).toHaveBeenLastCalledWith(expect.objectContaining({
+      type: 'chat:final',
+      runId: result.runId,
+      message: expect.objectContaining({
+        content: 'Final answer from provider.',
+      }),
+    }));
+  });
 });