feat: prepare Zhinian desktop client for pilot release
This commit is contained in:
271
electron/api/routes/knowledge.ts
Normal file
271
electron/api/routes/knowledge.ts
Normal file
@@ -0,0 +1,271 @@
|
||||
import type { IncomingMessage, ServerResponse } from 'node:http';
|
||||
import crypto from 'node:crypto';
|
||||
import { basename, extname, join } from 'node:path';
|
||||
import { mkdir, readFile, stat, writeFile, copyFile } from 'node:fs/promises';
|
||||
import type { HostApiContext } from '../context';
|
||||
import { parseJsonBody, sendJson } from '../route-utils';
|
||||
import { getDataDir } from '../../utils/paths';
|
||||
|
||||
const KNOWLEDGE_ROOT = join(getDataDir(), 'yinian', 'knowledge');
|
||||
const MAX_KNOWLEDGE_FILE_BYTES = 20 * 1024 * 1024;
|
||||
const MAX_CONTEXT_CHARS_PER_FILE = 32_000;
|
||||
const MAX_CONTEXT_TOTAL_CHARS = 96_000;
|
||||
|
||||
const TEXT_MIME_BY_EXT: Record<string, string> = {
|
||||
'.txt': 'text/plain',
|
||||
'.md': 'text/markdown',
|
||||
'.markdown': 'text/markdown',
|
||||
'.csv': 'text/csv',
|
||||
'.tsv': 'text/tab-separated-values',
|
||||
'.json': 'application/json',
|
||||
'.jsonl': 'application/x-ndjson',
|
||||
'.xml': 'application/xml',
|
||||
'.html': 'text/html',
|
||||
'.htm': 'text/html',
|
||||
'.yaml': 'application/yaml',
|
||||
'.yml': 'application/yaml',
|
||||
'.log': 'text/plain',
|
||||
'.ini': 'text/plain',
|
||||
'.conf': 'text/plain',
|
||||
'.css': 'text/css',
|
||||
'.js': 'text/javascript',
|
||||
'.jsx': 'text/javascript',
|
||||
'.ts': 'text/typescript',
|
||||
'.tsx': 'text/typescript',
|
||||
'.py': 'text/x-python',
|
||||
'.sql': 'application/sql',
|
||||
};
|
||||
|
||||
const WORD_MIME_BY_EXT: Record<string, string> = {
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
};
|
||||
|
||||
export interface KnowledgeDocument {
|
||||
id: string;
|
||||
workspaceId: string;
|
||||
name: string;
|
||||
mimeType: string;
|
||||
size: number;
|
||||
storedPath: string;
|
||||
textPath?: string;
|
||||
originalPath?: string;
|
||||
importedAt: number;
|
||||
status: 'stored';
|
||||
}
|
||||
|
||||
function sanitizeWorkspaceId(workspaceId?: string): string {
|
||||
const value = workspaceId?.trim() || 'default';
|
||||
return value.replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 96) || 'default';
|
||||
}
|
||||
|
||||
function getWorkspaceDir(workspaceId?: string): string {
|
||||
return join(KNOWLEDGE_ROOT, sanitizeWorkspaceId(workspaceId));
|
||||
}
|
||||
|
||||
function getRegistryPath(workspaceId?: string): string {
|
||||
return join(getWorkspaceDir(workspaceId), 'registry.json');
|
||||
}
|
||||
|
||||
async function readRegistry(workspaceId?: string): Promise<KnowledgeDocument[]> {
|
||||
const raw = await readFile(getRegistryPath(workspaceId), 'utf8').catch(() => '');
|
||||
if (!raw.trim()) return [];
|
||||
try {
|
||||
const parsed = JSON.parse(raw) as unknown;
|
||||
return Array.isArray(parsed) ? parsed as KnowledgeDocument[] : [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function writeRegistry(workspaceId: string | undefined, docs: KnowledgeDocument[]): Promise<void> {
|
||||
await mkdir(getWorkspaceDir(workspaceId), { recursive: true });
|
||||
await writeFile(getRegistryPath(workspaceId), JSON.stringify(docs, null, 2), 'utf8');
|
||||
}
|
||||
|
||||
function getTextMimeType(filePath: string): string | null {
|
||||
const ext = extname(filePath).toLowerCase();
|
||||
return TEXT_MIME_BY_EXT[ext] ?? WORD_MIME_BY_EXT[ext] ?? null;
|
||||
}
|
||||
|
||||
function isDocx(filePath: string): boolean {
|
||||
return extname(filePath).toLowerCase() === '.docx';
|
||||
}
|
||||
|
||||
async function extractDocxText(filePath: string): Promise<string> {
|
||||
const mammoth = await import('mammoth');
|
||||
const result = await mammoth.extractRawText({ path: filePath });
|
||||
return result.value.trim();
|
||||
}
|
||||
|
||||
export async function importKnowledgeFiles(params: {
|
||||
workspaceId?: string;
|
||||
filePaths: string[];
|
||||
}): Promise<{ documents: KnowledgeDocument[]; rejected: Array<{ filePath: string; reason: string }> }> {
|
||||
const workspaceId = sanitizeWorkspaceId(params.workspaceId);
|
||||
const workspaceDir = getWorkspaceDir(workspaceId);
|
||||
const filesDir = join(workspaceDir, 'files');
|
||||
const textDir = join(workspaceDir, 'texts');
|
||||
await mkdir(filesDir, { recursive: true });
|
||||
await mkdir(textDir, { recursive: true });
|
||||
|
||||
const currentDocs = await readRegistry(workspaceId);
|
||||
const importedDocs: KnowledgeDocument[] = [];
|
||||
const rejected: Array<{ filePath: string; reason: string }> = [];
|
||||
|
||||
for (const filePath of params.filePaths) {
|
||||
const mimeType = getTextMimeType(filePath);
|
||||
if (!mimeType) {
|
||||
rejected.push({ filePath, reason: '仅支持文本类知识文件' });
|
||||
continue;
|
||||
}
|
||||
|
||||
const fileStat = await stat(filePath).catch(() => null);
|
||||
if (!fileStat || !fileStat.isFile()) {
|
||||
rejected.push({ filePath, reason: '文件不存在或不可读取' });
|
||||
continue;
|
||||
}
|
||||
if (fileStat.size > MAX_KNOWLEDGE_FILE_BYTES) {
|
||||
rejected.push({ filePath, reason: '文件超过 20MB 限制' });
|
||||
continue;
|
||||
}
|
||||
|
||||
const id = crypto.randomUUID();
|
||||
const ext = extname(filePath).toLowerCase();
|
||||
const storedPath = join(filesDir, `${id}${ext}`);
|
||||
await copyFile(filePath, storedPath);
|
||||
let textPath: string | undefined;
|
||||
|
||||
if (isDocx(filePath)) {
|
||||
try {
|
||||
const extractedText = await extractDocxText(storedPath);
|
||||
if (!extractedText) {
|
||||
rejected.push({ filePath, reason: 'Word 文档未提取到可用文本' });
|
||||
continue;
|
||||
}
|
||||
textPath = join(textDir, `${id}.txt`);
|
||||
await writeFile(textPath, extractedText, 'utf8');
|
||||
} catch {
|
||||
rejected.push({ filePath, reason: 'Word 文档解析失败,请确认文件为 .docx 格式' });
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
const doc: KnowledgeDocument = {
|
||||
id,
|
||||
workspaceId,
|
||||
name: basename(filePath),
|
||||
mimeType,
|
||||
size: fileStat.size,
|
||||
storedPath,
|
||||
...(textPath ? { textPath } : {}),
|
||||
originalPath: filePath,
|
||||
importedAt: Date.now(),
|
||||
status: 'stored',
|
||||
};
|
||||
importedDocs.push(doc);
|
||||
}
|
||||
|
||||
if (importedDocs.length > 0) {
|
||||
await writeRegistry(workspaceId, [...importedDocs, ...currentDocs]);
|
||||
}
|
||||
|
||||
return { documents: importedDocs, rejected };
|
||||
}
|
||||
|
||||
export async function buildKnowledgeContext(params: {
|
||||
workspaceId?: string;
|
||||
documentIds: string[];
|
||||
}): Promise<{ context: string; documents: KnowledgeDocument[]; missing: string[] }> {
|
||||
const workspaceId = sanitizeWorkspaceId(params.workspaceId);
|
||||
const selectedIds = new Set(params.documentIds.filter((id) => typeof id === 'string' && id.trim()));
|
||||
if (selectedIds.size === 0) {
|
||||
return { context: '', documents: [], missing: [] };
|
||||
}
|
||||
|
||||
const registry = await readRegistry(workspaceId);
|
||||
const docs = registry.filter((doc) => selectedIds.has(doc.id));
|
||||
const missing = [...selectedIds].filter((id) => !docs.some((doc) => doc.id === id));
|
||||
const sections: string[] = [];
|
||||
const usedDocs: KnowledgeDocument[] = [];
|
||||
let totalChars = 0;
|
||||
|
||||
for (const doc of docs) {
|
||||
const readablePath = doc.textPath || doc.storedPath;
|
||||
const raw = await readFile(readablePath, 'utf8').catch(() => '');
|
||||
const text = raw.trim();
|
||||
if (!text) {
|
||||
missing.push(doc.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
const remaining = MAX_CONTEXT_TOTAL_CHARS - totalChars;
|
||||
if (remaining <= 0) break;
|
||||
const content = text.slice(0, Math.min(MAX_CONTEXT_CHARS_PER_FILE, remaining));
|
||||
totalChars += content.length;
|
||||
usedDocs.push(doc);
|
||||
sections.push([
|
||||
`## ${doc.name}`,
|
||||
`类型:${doc.mimeType}`,
|
||||
content,
|
||||
].join('\n'));
|
||||
}
|
||||
|
||||
if (sections.length === 0) {
|
||||
return { context: '', documents: [], missing };
|
||||
}
|
||||
|
||||
return {
|
||||
context: [
|
||||
'[知识库上下文]',
|
||||
'用户已选择在本轮对话中使用当前组织空间知识库。以下内容来自智念助手保存的本地备份文件;回答前请优先参考这些内容。',
|
||||
...sections,
|
||||
].join('\n\n'),
|
||||
documents: usedDocs,
|
||||
missing,
|
||||
};
|
||||
}
|
||||
|
||||
export async function handleKnowledgeRoutes(
|
||||
req: IncomingMessage,
|
||||
res: ServerResponse,
|
||||
url: URL,
|
||||
_ctx: HostApiContext,
|
||||
): Promise<boolean> {
|
||||
if (url.pathname === '/api/knowledge/files' && req.method === 'GET') {
|
||||
const workspaceId = sanitizeWorkspaceId(url.searchParams.get('workspaceId') ?? undefined);
|
||||
const documents = await readRegistry(workspaceId);
|
||||
sendJson(res, 200, { documents });
|
||||
return true;
|
||||
}
|
||||
|
||||
if (url.pathname === '/api/knowledge/import-paths' && req.method === 'POST') {
|
||||
try {
|
||||
const body = await parseJsonBody<{ workspaceId?: string; filePaths?: string[] }>(req);
|
||||
const filePaths = Array.isArray(body.filePaths) ? body.filePaths : [];
|
||||
if (filePaths.length === 0) {
|
||||
sendJson(res, 400, { success: false, error: 'No files selected' });
|
||||
return true;
|
||||
}
|
||||
|
||||
const result = await importKnowledgeFiles({ workspaceId: body.workspaceId, filePaths });
|
||||
sendJson(res, 200, { success: true, ...result });
|
||||
} catch (error) {
|
||||
sendJson(res, 500, { success: false, error: String(error) });
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (url.pathname === '/api/knowledge/context' && req.method === 'POST') {
|
||||
try {
|
||||
const body = await parseJsonBody<{ workspaceId?: string; documentIds?: string[] }>(req);
|
||||
const documentIds = Array.isArray(body.documentIds) ? body.documentIds : [];
|
||||
const result = await buildKnowledgeContext({ workspaceId: body.workspaceId, documentIds });
|
||||
sendJson(res, 200, { success: true, ...result });
|
||||
} catch (error) {
|
||||
sendJson(res, 500, { success: false, error: String(error) });
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
Reference in New Issue
Block a user