chore: stabilize Zhinian pilot delivery
This commit is contained in:
246
resources/skills/local/web-search/scripts/search-web.mjs
Normal file
246
resources/skills/local/web-search/scripts/search-web.mjs
Normal file
@@ -0,0 +1,246 @@
|
||||
#!/usr/bin/env node
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
|
||||
const DEFAULT_COUNT = 6;
|
||||
const MAX_COUNT = 10;
|
||||
const DEFAULT_TIMEOUT_MS = 9_000;
|
||||
const DEFAULT_MAX_CHARS = 4_000;
|
||||
const MAX_CHARS = 10_000;
|
||||
const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36 ZhinianAssistant/0.1';
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = {
|
||||
query: '',
|
||||
count: DEFAULT_COUNT,
|
||||
fetchUrl: '',
|
||||
maxChars: DEFAULT_MAX_CHARS,
|
||||
output: '',
|
||||
language: 'zh-CN',
|
||||
};
|
||||
|
||||
const positional = [];
|
||||
for (let index = 0; index < argv.length; index += 1) {
|
||||
const arg = argv[index];
|
||||
if (arg === '--count' || arg === '-n') {
|
||||
args.count = Number.parseInt(argv[++index] || '', 10);
|
||||
} else if (arg === '--fetch-url') {
|
||||
args.fetchUrl = argv[++index] || '';
|
||||
} else if (arg === '--max-chars') {
|
||||
args.maxChars = Number.parseInt(argv[++index] || '', 10);
|
||||
} else if (arg === '--output' || arg === '-o') {
|
||||
args.output = argv[++index] || '';
|
||||
} else if (arg === '--language' || arg === '--lang') {
|
||||
args.language = argv[++index] || args.language;
|
||||
} else if (arg === '--help' || arg === '-h') {
|
||||
printHelp();
|
||||
process.exit(0);
|
||||
} else {
|
||||
positional.push(arg);
|
||||
}
|
||||
}
|
||||
|
||||
args.query = positional.join(' ').trim();
|
||||
args.count = Number.isFinite(args.count) ? Math.max(1, Math.min(MAX_COUNT, args.count)) : DEFAULT_COUNT;
|
||||
args.maxChars = Number.isFinite(args.maxChars) ? Math.max(500, Math.min(MAX_CHARS, args.maxChars)) : DEFAULT_MAX_CHARS;
|
||||
return args;
|
||||
}
|
||||
|
||||
function printHelp() {
|
||||
process.stdout.write(`Usage:
|
||||
node search-web.mjs "<query>" [--count 6] [--output <file>]
|
||||
node search-web.mjs --fetch-url "<url>" [--max-chars 4000] [--output <file>]
|
||||
|
||||
Options:
|
||||
--count, -n Number of search results to return, 1-${MAX_COUNT}.
|
||||
--fetch-url Fetch and summarize a single page URL.
|
||||
--max-chars Maximum returned text length in fetch mode, 500-${MAX_CHARS}.
|
||||
--output, -o Write JSON result to a file in addition to stdout.
|
||||
--language Accept-Language value. Default: zh-CN.
|
||||
`);
|
||||
}
|
||||
|
||||
function decodeHtml(value) {
|
||||
return String(value || '')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/'/g, "'")
|
||||
.replace(/ /g, ' ')
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>');
|
||||
}
|
||||
|
||||
function stripTags(value) {
|
||||
return decodeHtml(value)
|
||||
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
|
||||
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
|
||||
.replace(/<noscript[\s\S]*?<\/noscript>/gi, ' ')
|
||||
.replace(/<br\s*\/?>/gi, '\n')
|
||||
.replace(/<\/(p|div|li|h[1-6]|tr)>/gi, '\n')
|
||||
.replace(/<[^>]+>/g, ' ')
|
||||
.replace(/[ \t]+/g, ' ')
|
||||
.replace(/\n\s+/g, '\n')
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function isHttpUrl(value) {
|
||||
try {
|
||||
const url = new URL(value);
|
||||
return url.protocol === 'http:' || url.protocol === 'https:';
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function buildBingSearchUrl(query) {
|
||||
const url = new URL('https://www.bing.com/search');
|
||||
url.searchParams.set('q', query);
|
||||
url.searchParams.set('setlang', 'zh-CN');
|
||||
url.searchParams.set('mkt', 'zh-CN');
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
async function fetchText(url, language, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
redirect: 'follow',
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
'User-Agent': USER_AGENT,
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml,application/json;q=0.9,text/plain;q=0.8,*/*;q=0.7',
|
||||
'Accept-Language': language || 'zh-CN',
|
||||
},
|
||||
});
|
||||
const contentType = response.headers.get('content-type') || '';
|
||||
const text = await response.text();
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status} ${response.statusText}`);
|
||||
}
|
||||
return { text, contentType, finalUrl: response.url, status: response.status };
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeBingUrl(rawUrl) {
|
||||
const url = decodeHtml(rawUrl).trim();
|
||||
if (!url) return '';
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
if (parsed.hostname.endsWith('bing.com') && parsed.pathname === '/ck/a') {
|
||||
const target = parsed.searchParams.get('u');
|
||||
if (target) {
|
||||
const decoded = target.startsWith('a1') ? Buffer.from(target.slice(2), 'base64').toString('utf8') : target;
|
||||
return isHttpUrl(decoded) ? decoded : url;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
return isHttpUrl(url) ? url : '';
|
||||
}
|
||||
|
||||
function parseBingResults(html, count) {
|
||||
const results = [];
|
||||
const seen = new Set();
|
||||
const blocks = html.match(/<li class="b_algo"[\s\S]*?<\/li>/gi) || [];
|
||||
for (const block of blocks) {
|
||||
if (results.length >= count) break;
|
||||
const linkMatch = block.match(/<h2[^>]*>[\s\S]*?<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h2>/i);
|
||||
if (!linkMatch) continue;
|
||||
const url = normalizeBingUrl(linkMatch[1]);
|
||||
if (!url || seen.has(url)) continue;
|
||||
seen.add(url);
|
||||
|
||||
const title = stripTags(linkMatch[2]);
|
||||
const snippetMatch = block.match(/<p[^>]*>([\s\S]*?)<\/p>/i);
|
||||
const snippet = snippetMatch ? stripTags(snippetMatch[1]) : '';
|
||||
let sourceName = '';
|
||||
try {
|
||||
sourceName = new URL(url).hostname.replace(/^www\./, '');
|
||||
} catch {
|
||||
sourceName = '';
|
||||
}
|
||||
|
||||
results.push({
|
||||
title,
|
||||
url,
|
||||
snippet,
|
||||
sourceName,
|
||||
});
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
function summarizeHtml(html, maxChars) {
|
||||
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
||||
const title = titleMatch ? stripTags(titleMatch[1]) : '';
|
||||
const text = stripTags(html).slice(0, maxChars);
|
||||
return { title, text, truncated: stripTags(html).length > maxChars };
|
||||
}
|
||||
|
||||
async function search(args) {
|
||||
if (!args.query) {
|
||||
throw new Error('Search query is required.');
|
||||
}
|
||||
const url = buildBingSearchUrl(args.query);
|
||||
const fetched = await fetchText(url, args.language);
|
||||
return {
|
||||
status: 'ok',
|
||||
mode: 'search',
|
||||
query: args.query,
|
||||
provider: 'bing-html',
|
||||
fetchedAt: new Date().toISOString(),
|
||||
results: parseBingResults(fetched.text, args.count),
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchPage(args) {
|
||||
if (!isHttpUrl(args.fetchUrl)) {
|
||||
throw new Error('A valid http(s) --fetch-url is required.');
|
||||
}
|
||||
const fetched = await fetchText(args.fetchUrl, args.language);
|
||||
const lowerType = fetched.contentType.toLowerCase();
|
||||
const body = lowerType.includes('application/json')
|
||||
? JSON.stringify(JSON.parse(fetched.text), null, 2).slice(0, args.maxChars)
|
||||
: lowerType.includes('text/plain')
|
||||
? fetched.text.slice(0, args.maxChars)
|
||||
: summarizeHtml(fetched.text, args.maxChars).text;
|
||||
const htmlSummary = lowerType.includes('text/html') ? summarizeHtml(fetched.text, args.maxChars) : null;
|
||||
return {
|
||||
status: 'ok',
|
||||
mode: 'fetch',
|
||||
url: args.fetchUrl,
|
||||
finalUrl: fetched.finalUrl,
|
||||
httpStatus: fetched.status,
|
||||
contentType: fetched.contentType,
|
||||
title: htmlSummary?.title || '',
|
||||
fetchedAt: new Date().toISOString(),
|
||||
text: body,
|
||||
truncated: fetched.text.length > args.maxChars,
|
||||
};
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
const result = args.fetchUrl ? await fetchPage(args) : await search(args);
|
||||
const json = `${JSON.stringify(result, null, 2)}\n`;
|
||||
if (args.output) {
|
||||
await writeFile(args.output, json, 'utf8');
|
||||
}
|
||||
process.stdout.write(json);
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
const json = `${JSON.stringify({
|
||||
status: 'error',
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
fetchedAt: new Date().toISOString(),
|
||||
}, null, 2)}\n`;
|
||||
process.stdout.write(json);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
Reference in New Issue
Block a user