#!/usr/bin/env node import { writeFile } from 'node:fs/promises'; const DEFAULT_COUNT = 6; const MAX_COUNT = 10; const DEFAULT_TIMEOUT_MS = 9_000; const DEFAULT_MAX_CHARS = 4_000; const MAX_CHARS = 10_000; const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36 ZhinianAssistant/0.1'; function parseArgs(argv) { const args = { query: '', count: DEFAULT_COUNT, fetchUrl: '', maxChars: DEFAULT_MAX_CHARS, output: '', language: 'zh-CN', }; const positional = []; for (let index = 0; index < argv.length; index += 1) { const arg = argv[index]; if (arg === '--count' || arg === '-n') { args.count = Number.parseInt(argv[++index] || '', 10); } else if (arg === '--fetch-url') { args.fetchUrl = argv[++index] || ''; } else if (arg === '--max-chars') { args.maxChars = Number.parseInt(argv[++index] || '', 10); } else if (arg === '--output' || arg === '-o') { args.output = argv[++index] || ''; } else if (arg === '--language' || arg === '--lang') { args.language = argv[++index] || args.language; } else if (arg === '--help' || arg === '-h') { printHelp(); process.exit(0); } else { positional.push(arg); } } args.query = positional.join(' ').trim(); args.count = Number.isFinite(args.count) ? Math.max(1, Math.min(MAX_COUNT, args.count)) : DEFAULT_COUNT; args.maxChars = Number.isFinite(args.maxChars) ? Math.max(500, Math.min(MAX_CHARS, args.maxChars)) : DEFAULT_MAX_CHARS; return args; } function printHelp() { process.stdout.write(`Usage: node search-web.mjs "" [--count 6] [--output ] node search-web.mjs --fetch-url "" [--max-chars 4000] [--output ] Options: --count, -n Number of search results to return, 1-${MAX_COUNT}. --fetch-url Fetch and summarize a single page URL. --max-chars Maximum returned text length in fetch mode, 500-${MAX_CHARS}. --output, -o Write JSON result to a file in addition to stdout. --language Accept-Language value. Default: zh-CN. `); } function decodeHtml(value) { return String(value || '') .replace(/"/g, '"') .replace(/"/g, '"') .replace(/'/g, "'") .replace(/'/g, "'") .replace(/ /g, ' ') .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>'); } function stripTags(value) { return decodeHtml(value) .replace(//gi, ' ') .replace(//gi, ' ') .replace(//gi, ' ') .replace(//gi, '\n') .replace(/<\/(p|div|li|h[1-6]|tr)>/gi, '\n') .replace(/<[^>]+>/g, ' ') .replace(/[ \t]+/g, ' ') .replace(/\n\s+/g, '\n') .replace(/\n{3,}/g, '\n\n') .trim(); } function isHttpUrl(value) { try { const url = new URL(value); return url.protocol === 'http:' || url.protocol === 'https:'; } catch { return false; } } function buildBingSearchUrl(query) { const url = new URL('https://www.bing.com/search'); url.searchParams.set('q', query); url.searchParams.set('setlang', 'zh-CN'); url.searchParams.set('mkt', 'zh-CN'); return url.toString(); } async function fetchText(url, language, timeoutMs = DEFAULT_TIMEOUT_MS) { const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), timeoutMs); try { const response = await fetch(url, { redirect: 'follow', signal: controller.signal, headers: { 'User-Agent': USER_AGENT, 'Accept': 'text/html,application/xhtml+xml,application/xml,application/json;q=0.9,text/plain;q=0.8,*/*;q=0.7', 'Accept-Language': language || 'zh-CN', }, }); const contentType = response.headers.get('content-type') || ''; const text = await response.text(); if (!response.ok) { throw new Error(`HTTP ${response.status} ${response.statusText}`); } return { text, contentType, finalUrl: response.url, status: response.status }; } finally { clearTimeout(timer); } } function normalizeBingUrl(rawUrl) { const url = decodeHtml(rawUrl).trim(); if (!url) return ''; try { const parsed = new URL(url); if (parsed.hostname.endsWith('bing.com') && parsed.pathname === '/ck/a') { const target = parsed.searchParams.get('u'); if (target) { const decoded = target.startsWith('a1') ? Buffer.from(target.slice(2), 'base64').toString('utf8') : target; return isHttpUrl(decoded) ? decoded : url; } } } catch { return ''; } return isHttpUrl(url) ? url : ''; } function parseBingResults(html, count) { const results = []; const seen = new Set(); const blocks = html.match(/
  • /gi) || []; for (const block of blocks) { if (results.length >= count) break; const linkMatch = block.match(/]*>[\s\S]*?]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h2>/i); if (!linkMatch) continue; const url = normalizeBingUrl(linkMatch[1]); if (!url || seen.has(url)) continue; seen.add(url); const title = stripTags(linkMatch[2]); const snippetMatch = block.match(/]*>([\s\S]*?)<\/p>/i); const snippet = snippetMatch ? stripTags(snippetMatch[1]) : ''; let sourceName = ''; try { sourceName = new URL(url).hostname.replace(/^www\./, ''); } catch { sourceName = ''; } results.push({ title, url, snippet, sourceName, }); } return results; } function summarizeHtml(html, maxChars) { const titleMatch = html.match(/]*>([\s\S]*?)<\/title>/i); const title = titleMatch ? stripTags(titleMatch[1]) : ''; const text = stripTags(html).slice(0, maxChars); return { title, text, truncated: stripTags(html).length > maxChars }; } async function search(args) { if (!args.query) { throw new Error('Search query is required.'); } const url = buildBingSearchUrl(args.query); const fetched = await fetchText(url, args.language); return { status: 'ok', mode: 'search', query: args.query, provider: 'bing-html', fetchedAt: new Date().toISOString(), results: parseBingResults(fetched.text, args.count), }; } async function fetchPage(args) { if (!isHttpUrl(args.fetchUrl)) { throw new Error('A valid http(s) --fetch-url is required.'); } const fetched = await fetchText(args.fetchUrl, args.language); const lowerType = fetched.contentType.toLowerCase(); const body = lowerType.includes('application/json') ? JSON.stringify(JSON.parse(fetched.text), null, 2).slice(0, args.maxChars) : lowerType.includes('text/plain') ? fetched.text.slice(0, args.maxChars) : summarizeHtml(fetched.text, args.maxChars).text; const htmlSummary = lowerType.includes('text/html') ? summarizeHtml(fetched.text, args.maxChars) : null; return { status: 'ok', mode: 'fetch', url: args.fetchUrl, finalUrl: fetched.finalUrl, httpStatus: fetched.status, contentType: fetched.contentType, title: htmlSummary?.title || '', fetchedAt: new Date().toISOString(), text: body, truncated: fetched.text.length > args.maxChars, }; } async function main() { const args = parseArgs(process.argv.slice(2)); const result = args.fetchUrl ? await fetchPage(args) : await search(args); const json = `${JSON.stringify(result, null, 2)}\n`; if (args.output) { await writeFile(args.output, json, 'utf8'); } process.stdout.write(json); } main().catch((error) => { const json = `${JSON.stringify({ status: 'error', error: error instanceof Error ? error.message : String(error), fetchedAt: new Date().toISOString(), }, null, 2)}\n`; process.stdout.write(json); process.exitCode = 1; });