292 lines
9.3 KiB
JavaScript
292 lines
9.3 KiB
JavaScript
#!/usr/bin/env node
|
|
import { createHash, randomUUID } from 'node:crypto';
|
|
import { mkdir, writeFile } from 'node:fs/promises';
|
|
import { dirname, extname, join, resolve } from 'node:path';
|
|
import { homedir } from 'node:os';
|
|
|
|
const DEFAULT_COUNT = 8;
|
|
const MAX_COUNT = 20;
|
|
const DEFAULT_TIMEOUT_MS = 18_000;
|
|
const MAX_DOWNLOAD_BYTES = 15 * 1024 * 1024;
|
|
const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36 ZhinianAssistant/0.1';
|
|
|
|
function parseArgs(argv) {
|
|
const args = {
|
|
query: '',
|
|
count: DEFAULT_COUNT,
|
|
download: 0,
|
|
outDir: join(homedir(), '.openclaw', 'media', 'image-search'),
|
|
output: '',
|
|
safe: 'moderate',
|
|
language: 'zh-CN',
|
|
};
|
|
|
|
const positional = [];
|
|
for (let index = 0; index < argv.length; index += 1) {
|
|
const arg = argv[index];
|
|
if (arg === '--count' || arg === '-n') {
|
|
args.count = Number.parseInt(argv[++index] || '', 10);
|
|
} else if (arg === '--download' || arg === '-d') {
|
|
const next = argv[index + 1];
|
|
if (next && !next.startsWith('-')) {
|
|
args.download = Number.parseInt(next, 10);
|
|
index += 1;
|
|
} else {
|
|
args.download = Math.min(args.count, 3);
|
|
}
|
|
} else if (arg === '--out-dir') {
|
|
args.outDir = argv[++index] || args.outDir;
|
|
} else if (arg === '--output' || arg === '-o') {
|
|
args.output = argv[++index] || '';
|
|
} else if (arg === '--safe') {
|
|
args.safe = argv[++index] || args.safe;
|
|
} else if (arg === '--language' || arg === '--lang') {
|
|
args.language = argv[++index] || args.language;
|
|
} else if (arg === '--help' || arg === '-h') {
|
|
printHelp();
|
|
process.exit(0);
|
|
} else {
|
|
positional.push(arg);
|
|
}
|
|
}
|
|
|
|
args.query = positional.join(' ').trim();
|
|
args.count = Number.isFinite(args.count) ? Math.max(1, Math.min(MAX_COUNT, args.count)) : DEFAULT_COUNT;
|
|
args.download = Number.isFinite(args.download) ? Math.max(0, Math.min(args.count, args.download)) : 0;
|
|
return args;
|
|
}
|
|
|
|
function printHelp() {
|
|
process.stdout.write(`Usage:
|
|
node search-images.mjs "<query>" [--count 8] [--download 3] [--out-dir <dir>] [--output <file>]
|
|
|
|
Options:
|
|
--count, -n Number of image candidates to return, 1-${MAX_COUNT}.
|
|
--download, -d Download the first N image candidates to local files.
|
|
--out-dir Directory for downloaded images. Defaults to ~/.openclaw/media/image-search.
|
|
--output, -o Write JSON result to a file in addition to stdout.
|
|
--safe Safe search level passed to the image search page. Default: moderate.
|
|
--language Accept-Language value. Default: zh-CN.
|
|
`);
|
|
}
|
|
|
|
function decodeHtml(value) {
|
|
return String(value || '')
|
|
.replace(/"/g, '"')
|
|
.replace(/"/g, '"')
|
|
.replace(/&/g, '&')
|
|
.replace(/'/g, "'")
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>');
|
|
}
|
|
|
|
function stripBingHighlights(value) {
|
|
return decodeHtml(value)
|
|
.replace(/\uE000|\uE001|\uE002|\uE003/g, '')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
function siteNameFromUrl(url) {
|
|
try {
|
|
return new URL(url).hostname.replace(/^www\./, '');
|
|
} catch {
|
|
return '';
|
|
}
|
|
}
|
|
|
|
function extFromMime(contentType) {
|
|
const normalized = String(contentType || '').split(';')[0].trim().toLowerCase();
|
|
if (normalized === 'image/jpeg' || normalized === 'image/jpg') return '.jpg';
|
|
if (normalized === 'image/png') return '.png';
|
|
if (normalized === 'image/webp') return '.webp';
|
|
if (normalized === 'image/gif') return '.gif';
|
|
if (normalized === 'image/avif') return '.avif';
|
|
return '';
|
|
}
|
|
|
|
function extFromUrl(url) {
|
|
try {
|
|
const ext = extname(new URL(url).pathname).toLowerCase();
|
|
if (['.jpg', '.jpeg', '.png', '.webp', '.gif', '.avif'].includes(ext)) return ext === '.jpeg' ? '.jpg' : ext;
|
|
} catch {
|
|
return '';
|
|
}
|
|
return '';
|
|
}
|
|
|
|
function stableId(value) {
|
|
return createHash('sha1').update(value).digest('hex').slice(0, 12);
|
|
}
|
|
|
|
function buildBingImagesUrl(params) {
|
|
const url = new URL('https://www.bing.com/images/search');
|
|
url.searchParams.set('q', params.query);
|
|
url.searchParams.set('form', 'HDRSC2');
|
|
url.searchParams.set('safeSearch', params.safe);
|
|
return url.toString();
|
|
}
|
|
|
|
async function fetchText(url, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
const controller = new AbortController();
|
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
try {
|
|
const response = await fetch(url, {
|
|
signal: controller.signal,
|
|
headers: {
|
|
'User-Agent': USER_AGENT,
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
},
|
|
});
|
|
if (!response.ok) {
|
|
throw new Error(`HTTP ${response.status} ${response.statusText}`);
|
|
}
|
|
return await response.text();
|
|
} finally {
|
|
clearTimeout(timer);
|
|
}
|
|
}
|
|
|
|
function parseBingImages(html, count) {
|
|
const results = [];
|
|
const seen = new Set();
|
|
const matcher = /m="([^"]+)"/g;
|
|
let match;
|
|
|
|
while ((match = matcher.exec(html)) && results.length < count) {
|
|
const raw = decodeHtml(match[1]);
|
|
let metadata;
|
|
try {
|
|
metadata = JSON.parse(raw);
|
|
} catch {
|
|
continue;
|
|
}
|
|
|
|
const imageUrl = typeof metadata.murl === 'string' ? metadata.murl.trim() : '';
|
|
const thumbnailUrl = typeof metadata.turl === 'string' ? metadata.turl.trim() : '';
|
|
const sourcePageUrl = typeof metadata.purl === 'string' ? metadata.purl.trim() : '';
|
|
if (!imageUrl || seen.has(imageUrl)) continue;
|
|
seen.add(imageUrl);
|
|
|
|
results.push({
|
|
id: stableId(imageUrl),
|
|
title: stripBingHighlights(metadata.t || metadata.desc || ''),
|
|
description: stripBingHighlights(metadata.desc || ''),
|
|
imageUrl,
|
|
thumbnailUrl,
|
|
sourcePageUrl,
|
|
sourceName: siteNameFromUrl(sourcePageUrl || imageUrl),
|
|
width: Number.isFinite(Number(metadata.w)) ? Number(metadata.w) : undefined,
|
|
height: Number.isFinite(Number(metadata.h)) ? Number(metadata.h) : undefined,
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function downloadImage(result, outDir, index) {
|
|
const controller = new AbortController();
|
|
const timer = setTimeout(() => controller.abort(), DEFAULT_TIMEOUT_MS);
|
|
try {
|
|
const response = await fetch(result.imageUrl, {
|
|
signal: controller.signal,
|
|
redirect: 'follow',
|
|
headers: {
|
|
'User-Agent': USER_AGENT,
|
|
'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
|
|
'Referer': result.sourcePageUrl || 'https://www.bing.com/',
|
|
},
|
|
});
|
|
if (!response.ok) {
|
|
throw new Error(`HTTP ${response.status} ${response.statusText}`);
|
|
}
|
|
const contentType = response.headers.get('content-type') || '';
|
|
if (!contentType.toLowerCase().startsWith('image/')) {
|
|
throw new Error(`Not an image response: ${contentType || 'unknown content-type'}`);
|
|
}
|
|
const contentLength = Number(response.headers.get('content-length') || '0');
|
|
if (contentLength > MAX_DOWNLOAD_BYTES) {
|
|
throw new Error(`Image too large: ${contentLength} bytes`);
|
|
}
|
|
const bytes = Buffer.from(await response.arrayBuffer());
|
|
if (bytes.byteLength > MAX_DOWNLOAD_BYTES) {
|
|
throw new Error(`Image too large: ${bytes.byteLength} bytes`);
|
|
}
|
|
|
|
await mkdir(outDir, { recursive: true });
|
|
const ext = extFromMime(contentType) || extFromUrl(result.imageUrl) || '.jpg';
|
|
const fileName = `${String(index + 1).padStart(2, '0')}-${result.id || randomUUID()}${ext}`;
|
|
const filePath = join(outDir, fileName);
|
|
await writeFile(filePath, bytes);
|
|
|
|
return {
|
|
localPath: filePath,
|
|
mimeType: contentType.split(';')[0].trim() || undefined,
|
|
fileSize: bytes.byteLength,
|
|
};
|
|
} finally {
|
|
clearTimeout(timer);
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
const args = parseArgs(process.argv.slice(2));
|
|
if (!args.query) {
|
|
printHelp();
|
|
process.exit(2);
|
|
}
|
|
|
|
const searchUrl = buildBingImagesUrl(args);
|
|
const startedAt = Date.now();
|
|
const warnings = [
|
|
'Web images may have copyright or usage restrictions. Use sourcePageUrl to verify rights before commercial use.',
|
|
];
|
|
|
|
let results = [];
|
|
try {
|
|
const html = await fetchText(searchUrl);
|
|
results = parseBingImages(html, args.count);
|
|
} catch (error) {
|
|
warnings.push(`Image search failed: ${error?.message || String(error)}`);
|
|
}
|
|
|
|
const resolvedOutDir = resolve(args.outDir.replace(/^~(?=$|\/)/, homedir()));
|
|
const downloadCount = Math.min(args.download, results.length);
|
|
for (let index = 0; index < downloadCount; index += 1) {
|
|
try {
|
|
const download = await downloadImage(results[index], resolvedOutDir, index);
|
|
results[index] = { ...results[index], ...download };
|
|
} catch (error) {
|
|
results[index] = {
|
|
...results[index],
|
|
downloadError: error?.message || String(error),
|
|
};
|
|
}
|
|
}
|
|
|
|
const payload = {
|
|
success: results.length > 0,
|
|
provider: 'bing-images-html',
|
|
query: args.query,
|
|
count: results.length,
|
|
tookMs: Date.now() - startedAt,
|
|
searchUrl,
|
|
warnings,
|
|
results,
|
|
};
|
|
|
|
const json = `${JSON.stringify(payload, null, 2)}\n`;
|
|
if (args.output) {
|
|
const outputPath = resolve(args.output.replace(/^~(?=$|\/)/, homedir()));
|
|
await mkdir(dirname(outputPath), { recursive: true });
|
|
await writeFile(outputPath, json, 'utf8');
|
|
}
|
|
process.stdout.write(json);
|
|
}
|
|
|
|
main().catch((error) => {
|
|
process.stderr.write(`${error?.stack || error?.message || String(error)}\n`);
|
|
process.exit(1);
|
|
});
|