chore: stabilize Zhinian pilot delivery

This commit is contained in:
inman
2026-05-12 19:44:44 +08:00
parent 45389855e1
commit 20b5aff4ad
174 changed files with 41428 additions and 784 deletions

View File

@@ -0,0 +1,46 @@
---
name: web-search
description: Search the public web and fetch short page summaries through Zhinian's guarded local script. Use when the user asks for current information, news, weather, facts that may have changed, source links, or online research. Prefer image-search for image-only requests.
---
# web-search
Use this skill when the user needs current public web information.
This skill is independent from OpenClaw's built-in `web_search` and `web_fetch` tools. Do not change Tavily, Brave, DuckDuckGo, SearXNG, or OpenClaw web settings to use this skill.
## Standard Workflow
1. Search for web results:
```bash
node ~/.openclaw/skills/web-search/scripts/search-web.mjs "<query>" --count 6
```
2. If one result needs more detail, fetch that exact page with a short limit:
```bash
node ~/.openclaw/skills/web-search/scripts/search-web.mjs --fetch-url "<url>" --max-chars 4000
```
3. Use the returned JSON:
- `results[].title` is the page title.
- `results[].url` is the source page.
- `results[].snippet` is a short search preview.
- `text` exists only in `--fetch-url` mode.
## Rules
- Keep queries focused and short.
- Prefer 2-5 sources for factual/current answers.
- If search or fetch fails, say the online lookup failed and answer from available context only.
- Do not retry indefinitely. This skill is intentionally guarded to avoid blocking the conversation.
- For image requests, use the `image-search` skill instead.
- Include source links in user-facing answers when current information was used.
## Examples
```bash
node ~/.openclaw/skills/web-search/scripts/search-web.mjs "贵阳 今天 天气" --count 5
node ~/.openclaw/skills/web-search/scripts/search-web.mjs "2026 企业微信 最新收费政策" --count 6
node ~/.openclaw/skills/web-search/scripts/search-web.mjs --fetch-url "https://example.com/news/article" --max-chars 3000
```

View File

@@ -0,0 +1,246 @@
#!/usr/bin/env node
import { writeFile } from 'node:fs/promises';
const DEFAULT_COUNT = 6;
const MAX_COUNT = 10;
const DEFAULT_TIMEOUT_MS = 9_000;
const DEFAULT_MAX_CHARS = 4_000;
const MAX_CHARS = 10_000;
const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36 ZhinianAssistant/0.1';
function parseArgs(argv) {
const args = {
query: '',
count: DEFAULT_COUNT,
fetchUrl: '',
maxChars: DEFAULT_MAX_CHARS,
output: '',
language: 'zh-CN',
};
const positional = [];
for (let index = 0; index < argv.length; index += 1) {
const arg = argv[index];
if (arg === '--count' || arg === '-n') {
args.count = Number.parseInt(argv[++index] || '', 10);
} else if (arg === '--fetch-url') {
args.fetchUrl = argv[++index] || '';
} else if (arg === '--max-chars') {
args.maxChars = Number.parseInt(argv[++index] || '', 10);
} else if (arg === '--output' || arg === '-o') {
args.output = argv[++index] || '';
} else if (arg === '--language' || arg === '--lang') {
args.language = argv[++index] || args.language;
} else if (arg === '--help' || arg === '-h') {
printHelp();
process.exit(0);
} else {
positional.push(arg);
}
}
args.query = positional.join(' ').trim();
args.count = Number.isFinite(args.count) ? Math.max(1, Math.min(MAX_COUNT, args.count)) : DEFAULT_COUNT;
args.maxChars = Number.isFinite(args.maxChars) ? Math.max(500, Math.min(MAX_CHARS, args.maxChars)) : DEFAULT_MAX_CHARS;
return args;
}
function printHelp() {
process.stdout.write(`Usage:
node search-web.mjs "<query>" [--count 6] [--output <file>]
node search-web.mjs --fetch-url "<url>" [--max-chars 4000] [--output <file>]
Options:
--count, -n Number of search results to return, 1-${MAX_COUNT}.
--fetch-url Fetch and summarize a single page URL.
--max-chars Maximum returned text length in fetch mode, 500-${MAX_CHARS}.
--output, -o Write JSON result to a file in addition to stdout.
--language Accept-Language value. Default: zh-CN.
`);
}
function decodeHtml(value) {
return String(value || '')
.replace(/&quot;/g, '"')
.replace(/&#34;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&apos;/g, "'")
.replace(/&nbsp;/g, ' ')
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>');
}
function stripTags(value) {
return decodeHtml(value)
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
.replace(/<noscript[\s\S]*?<\/noscript>/gi, ' ')
.replace(/<br\s*\/?>/gi, '\n')
.replace(/<\/(p|div|li|h[1-6]|tr)>/gi, '\n')
.replace(/<[^>]+>/g, ' ')
.replace(/[ \t]+/g, ' ')
.replace(/\n\s+/g, '\n')
.replace(/\n{3,}/g, '\n\n')
.trim();
}
function isHttpUrl(value) {
try {
const url = new URL(value);
return url.protocol === 'http:' || url.protocol === 'https:';
} catch {
return false;
}
}
function buildBingSearchUrl(query) {
const url = new URL('https://www.bing.com/search');
url.searchParams.set('q', query);
url.searchParams.set('setlang', 'zh-CN');
url.searchParams.set('mkt', 'zh-CN');
return url.toString();
}
async function fetchText(url, language, timeoutMs = DEFAULT_TIMEOUT_MS) {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
try {
const response = await fetch(url, {
redirect: 'follow',
signal: controller.signal,
headers: {
'User-Agent': USER_AGENT,
'Accept': 'text/html,application/xhtml+xml,application/xml,application/json;q=0.9,text/plain;q=0.8,*/*;q=0.7',
'Accept-Language': language || 'zh-CN',
},
});
const contentType = response.headers.get('content-type') || '';
const text = await response.text();
if (!response.ok) {
throw new Error(`HTTP ${response.status} ${response.statusText}`);
}
return { text, contentType, finalUrl: response.url, status: response.status };
} finally {
clearTimeout(timer);
}
}
function normalizeBingUrl(rawUrl) {
const url = decodeHtml(rawUrl).trim();
if (!url) return '';
try {
const parsed = new URL(url);
if (parsed.hostname.endsWith('bing.com') && parsed.pathname === '/ck/a') {
const target = parsed.searchParams.get('u');
if (target) {
const decoded = target.startsWith('a1') ? Buffer.from(target.slice(2), 'base64').toString('utf8') : target;
return isHttpUrl(decoded) ? decoded : url;
}
}
} catch {
return '';
}
return isHttpUrl(url) ? url : '';
}
function parseBingResults(html, count) {
const results = [];
const seen = new Set();
const blocks = html.match(/<li class="b_algo"[\s\S]*?<\/li>/gi) || [];
for (const block of blocks) {
if (results.length >= count) break;
const linkMatch = block.match(/<h2[^>]*>[\s\S]*?<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h2>/i);
if (!linkMatch) continue;
const url = normalizeBingUrl(linkMatch[1]);
if (!url || seen.has(url)) continue;
seen.add(url);
const title = stripTags(linkMatch[2]);
const snippetMatch = block.match(/<p[^>]*>([\s\S]*?)<\/p>/i);
const snippet = snippetMatch ? stripTags(snippetMatch[1]) : '';
let sourceName = '';
try {
sourceName = new URL(url).hostname.replace(/^www\./, '');
} catch {
sourceName = '';
}
results.push({
title,
url,
snippet,
sourceName,
});
}
return results;
}
function summarizeHtml(html, maxChars) {
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
const title = titleMatch ? stripTags(titleMatch[1]) : '';
const text = stripTags(html).slice(0, maxChars);
return { title, text, truncated: stripTags(html).length > maxChars };
}
async function search(args) {
if (!args.query) {
throw new Error('Search query is required.');
}
const url = buildBingSearchUrl(args.query);
const fetched = await fetchText(url, args.language);
return {
status: 'ok',
mode: 'search',
query: args.query,
provider: 'bing-html',
fetchedAt: new Date().toISOString(),
results: parseBingResults(fetched.text, args.count),
};
}
async function fetchPage(args) {
if (!isHttpUrl(args.fetchUrl)) {
throw new Error('A valid http(s) --fetch-url is required.');
}
const fetched = await fetchText(args.fetchUrl, args.language);
const lowerType = fetched.contentType.toLowerCase();
const body = lowerType.includes('application/json')
? JSON.stringify(JSON.parse(fetched.text), null, 2).slice(0, args.maxChars)
: lowerType.includes('text/plain')
? fetched.text.slice(0, args.maxChars)
: summarizeHtml(fetched.text, args.maxChars).text;
const htmlSummary = lowerType.includes('text/html') ? summarizeHtml(fetched.text, args.maxChars) : null;
return {
status: 'ok',
mode: 'fetch',
url: args.fetchUrl,
finalUrl: fetched.finalUrl,
httpStatus: fetched.status,
contentType: fetched.contentType,
title: htmlSummary?.title || '',
fetchedAt: new Date().toISOString(),
text: body,
truncated: fetched.text.length > args.maxChars,
};
}
async function main() {
const args = parseArgs(process.argv.slice(2));
const result = args.fetchUrl ? await fetchPage(args) : await search(args);
const json = `${JSON.stringify(result, null, 2)}\n`;
if (args.output) {
await writeFile(args.output, json, 'utf8');
}
process.stdout.write(json);
}
main().catch((error) => {
const json = `${JSON.stringify({
status: 'error',
error: error instanceof Error ? error.message : String(error),
fetchedAt: new Date().toISOString(),
}, null, 2)}\n`;
process.stdout.write(json);
process.exitCode = 1;
});