Add unit tests for skill capabilities, skill planner, and UV setup

- Implement tests for random ID generation, ensuring preference for crypto.randomUUID.
- Create tests for runtime context capabilities, validating the injection of enabled skill capabilities.
- Add tests for skill capability parsing, including classification and command example extraction.
- Introduce tests for the skill planner, verifying tool call planning based on user requests and attachment requirements.
- Establish tests for UV setup, ensuring proper handling of Python installation scenarios and environment checks.
This commit is contained in:
DEV_DSW
2026-04-24 17:02:59 +08:00
parent e11a2296cc
commit 4c61e93c3e
42 changed files with 12560 additions and 224 deletions

View File

@@ -4,10 +4,18 @@ import { beforeEach, describe, expect, it, vi } from 'vitest';
const mocks = vi.hoisted(() => {
const sessionMessages: any[] = [];
const activeRuns = new Map<string, { runId: string; abortController: AbortController }>();
return {
sessionMessages,
activeRuns,
providerChat: vi.fn(),
providerGetCapabilities: vi.fn(),
getEnabledSkillCapabilities: vi.fn(() => []),
toolRuntimeRun: vi.fn(),
createChatToolRuntime: vi.fn(() => ({
run: mocks.toolRuntimeRun,
})),
appendMessage: vi.fn((_: string, message: unknown) => {
sessionMessages.push(message);
}),
@@ -16,8 +24,13 @@ const mocks = vi.hoisted(() => {
messages: [...sessionMessages],
updatedAt: Date.now(),
})),
setActiveRun: vi.fn(),
clearActiveRun: vi.fn(),
setActiveRun: vi.fn((sessionKey: string, runId: string, abortController: AbortController) => {
activeRuns.set(sessionKey, { runId, abortController });
}),
clearActiveRun: vi.fn((sessionKey: string) => {
activeRuns.delete(sessionKey);
}),
getActiveRun: vi.fn((sessionKey: string) => activeRuns.get(sessionKey)),
appendTranscriptLine: vi.fn(),
maybeHandleBrowserOpenMessage: vi.fn(() => false),
maybeHandleSkillInstallMessage: vi.fn(() => false),
@@ -30,6 +43,7 @@ const mocks = vi.hoisted(() => {
vi.mock('@electron/providers', () => ({
createProvider: vi.fn(() => ({
chat: mocks.providerChat,
getCapabilities: mocks.providerGetCapabilities,
})),
}));
@@ -53,6 +67,7 @@ vi.mock('../electron/gateway/session-store', () => ({
getOrCreate: mocks.getOrCreate,
setActiveRun: mocks.setActiveRun,
clearActiveRun: mocks.clearActiveRun,
getActiveRun: mocks.getActiveRun,
},
}));
@@ -60,6 +75,18 @@ vi.mock('@electron/utils/token-usage-writer', () => ({
appendTranscriptLine: mocks.appendTranscriptLine,
}));
vi.mock('../electron/gateway/skill-capability-registry', () => ({
getEnabledSkillCapabilities: mocks.getEnabledSkillCapabilities,
}));
vi.mock('../electron/gateway/chat-tooling', async () => {
const actual = await vi.importActual<typeof import('../electron/gateway/chat-tooling')>('../electron/gateway/chat-tooling');
return {
...actual,
createChatToolRuntime: mocks.createChatToolRuntime,
};
});
vi.mock('../electron/gateway/browser-shortcut', () => ({
maybeHandleBrowserOpenMessage: mocks.maybeHandleBrowserOpenMessage,
}));
@@ -82,17 +109,62 @@ function createStream(chunks: Array<{ result?: string; usage?: unknown }>) {
};
}
function flushAsyncTasks(): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, 0));
function flushAsyncTasks(iterations = 1): Promise<void> {
return new Promise((resolve) => {
const next = (remaining: number) => {
if (remaining <= 0) {
resolve();
return;
}
setTimeout(() => next(remaining - 1), 0);
};
next(iterations);
});
}
const spreadsheetCapability = {
skillKey: 'minimax-xlsx',
slug: 'minimax-xlsx',
name: 'MiniMax XLSX',
description: 'Analyze spreadsheet files such as .xlsx and .csv.',
enabled: true,
category: 'document',
allowedTools: [],
operationHints: ['read', 'analyze'],
triggerHints: ['spreadsheet', 'excel'],
inputExtensions: ['.xlsx', '.csv', '.tsv'],
requiredEnvVars: [],
requiresAuth: false,
plannerSummary: 'document skill; operations: read, analyze; inputs: .xlsx, .csv, .tsv',
renderHints: {
card: 'document-analysis',
preferredView: 'table',
skillType: 'spreadsheet',
},
};
describe('chat runtime context', () => {
beforeEach(() => {
vi.resetModules();
vi.clearAllMocks();
mocks.sessionMessages.length = 0;
mocks.activeRuns.clear();
mocks.maybeHandleBrowserOpenMessage.mockReturnValue(false);
mocks.maybeHandleSkillInstallMessage.mockReturnValue(false);
mocks.getEnabledSkillCapabilities.mockReturnValue([]);
mocks.providerChat.mockResolvedValue(createStream([{ result: 'done' }]));
mocks.providerGetCapabilities.mockReturnValue({
structuredMessages: false,
toolCalls: false,
toolResults: false,
thinking: false,
});
mocks.toolRuntimeRun.mockReset();
mocks.createChatToolRuntime.mockImplementation(() => ({
run: mocks.toolRuntimeRun,
}));
});
it('prepends the zn-ai runtime context before provider chat runs', async () => {
@@ -134,4 +206,212 @@ describe('chat runtime context', () => {
content: expect.stringContaining('skills.install'),
});
});
it('persists tool_use -> tool_result -> final for planner-first spreadsheet execution', async () => {
mocks.getEnabledSkillCapabilities.mockReturnValue([spreadsheetCapability]);
mocks.providerChat.mockResolvedValue(createStream([{ result: 'Final answer from provider.' }]));
mocks.toolRuntimeRun.mockImplementation(async (invocation: { toolCallId: string; toolName: string; input: unknown }) => {
const payload = {
ok: true,
summary: 'Spreadsheet analysis completed.',
structuredData: {
reports: [{ filePath: 'C:\\tmp\\report.xlsx', rows: 3 }],
},
renderHints: {
card: 'document-analysis',
preferredView: 'table',
skillType: 'spreadsheet',
},
raw: {
reports: [{ filePath: 'C:\\tmp\\report.xlsx', rows: 3 }],
},
};
return {
preflight: {
ok: true,
status: 'ready',
toolCallId: invocation.toolCallId,
toolName: invocation.toolName,
normalizedInput: invocation.input,
summary: 'Ready to analyze the spreadsheet.',
},
execution: {
ok: true,
status: 'completed',
toolCallId: invocation.toolCallId,
toolName: invocation.toolName,
normalizedInput: invocation.input,
summary: 'Spreadsheet analysis completed.',
raw: payload.raw,
durationMs: 12,
},
normalized: {
ok: true,
status: 'completed',
toolCallId: invocation.toolCallId,
toolName: invocation.toolName,
summary: 'Spreadsheet analysis completed.',
payload,
block: {
type: 'tool_result',
toolCallId: invocation.toolCallId,
content: 'Spreadsheet analysis completed.',
result: payload,
summary: 'Spreadsheet analysis completed.',
ok: true,
},
transcriptMessage: {
role: 'tool_result',
content: [
{
type: 'tool_result',
toolCallId: invocation.toolCallId,
content: 'Spreadsheet analysis completed.',
result: payload,
summary: 'Spreadsheet analysis completed.',
ok: true,
},
],
timestamp: Date.now(),
toolCallId: invocation.toolCallId,
toolName: invocation.toolName,
toolCall: {
id: invocation.toolCallId,
name: invocation.toolName,
input: invocation.input,
summary: 'Spreadsheet analysis completed.',
},
toolResult: payload,
},
},
};
});
const { handleChatSend } = await import('../electron/gateway/handlers/chat');
const broadcast = vi.fn();
const result = handleChatSend(
{
sessionKey: 'agent:test:main',
message: {
role: 'user',
content: 'Use minimax-xlsx to analyze this spreadsheet.',
_attachedFiles: [
{
fileName: 'report.xlsx',
mimeType: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
fileSize: 2048,
preview: null,
filePath: 'C:\\tmp\\report.xlsx',
source: 'user-upload',
},
],
},
},
broadcast,
);
expect(result.runId).toBeTypeOf('string');
expect(mocks.sessionMessages).toHaveLength(2);
expect(mocks.sessionMessages[1]).toEqual(expect.objectContaining({
role: 'assistant',
toolName: 'minimax-xlsx',
content: [
expect.objectContaining({
type: 'tool_use',
name: 'minimax-xlsx',
}),
],
}));
await flushAsyncTasks(4);
expect(mocks.toolRuntimeRun).toHaveBeenCalledWith(
expect.objectContaining({
toolName: 'minimax-xlsx',
source: 'planner',
}),
expect.objectContaining({
sessionKey: 'agent:test:main',
runId: result.runId,
files: expect.arrayContaining([
expect.objectContaining({
filePath: 'C:\\tmp\\report.xlsx',
}),
]),
}),
);
expect(mocks.providerChat).toHaveBeenCalledTimes(1);
const [messages, model] = mocks.providerChat.mock.calls[0] ?? [];
expect(model).toBe('gpt-4o-mini');
expect(messages).toEqual(expect.arrayContaining([
expect.objectContaining({
role: 'system',
content: expect.stringContaining('minimax-xlsx'),
}),
expect.objectContaining({
role: 'assistant',
content: [
expect.objectContaining({
type: 'tool_use',
name: 'minimax-xlsx',
}),
],
}),
expect.objectContaining({
role: 'tool_result',
content: [
expect.objectContaining({
type: 'tool_result',
summary: 'Spreadsheet analysis completed.',
}),
],
}),
]));
expect(mocks.sessionMessages.map((message) => message.role)).toEqual([
'user',
'assistant',
'tool_result',
'assistant',
]);
expect(mocks.sessionMessages[2]).toEqual(expect.objectContaining({
role: 'tool_result',
toolName: 'minimax-xlsx',
toolResult: expect.objectContaining({
summary: 'Spreadsheet analysis completed.',
}),
_toolStatuses: [
expect.objectContaining({
name: 'minimax-xlsx',
status: 'completed',
summary: 'Spreadsheet analysis completed.',
}),
],
}));
expect(mocks.sessionMessages[3]).toEqual(expect.objectContaining({
role: 'assistant',
content: 'Final answer from provider.',
}));
expect(broadcast).toHaveBeenCalledWith(expect.objectContaining({
type: 'tool:status',
toolName: 'minimax-xlsx',
status: 'running',
}));
expect(broadcast).toHaveBeenCalledWith(expect.objectContaining({
type: 'tool:status',
toolName: 'minimax-xlsx',
status: 'completed',
}));
expect(broadcast).toHaveBeenLastCalledWith(expect.objectContaining({
type: 'chat:final',
runId: result.runId,
message: expect.objectContaining({
content: 'Final answer from provider.',
}),
}));
});
});