chore: stabilize Zhinian pilot delivery
This commit is contained in:
291
resources/skills/local/image-search/scripts/search-images.mjs
Normal file
291
resources/skills/local/image-search/scripts/search-images.mjs
Normal file
@@ -0,0 +1,291 @@
|
||||
#!/usr/bin/env node
|
||||
import { createHash, randomUUID } from 'node:crypto';
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { dirname, extname, join, resolve } from 'node:path';
|
||||
import { homedir } from 'node:os';
|
||||
|
||||
const DEFAULT_COUNT = 8;
|
||||
const MAX_COUNT = 20;
|
||||
const DEFAULT_TIMEOUT_MS = 18_000;
|
||||
const MAX_DOWNLOAD_BYTES = 15 * 1024 * 1024;
|
||||
const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36 ZhinianAssistant/0.1';
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = {
|
||||
query: '',
|
||||
count: DEFAULT_COUNT,
|
||||
download: 0,
|
||||
outDir: join(homedir(), '.openclaw', 'media', 'image-search'),
|
||||
output: '',
|
||||
safe: 'moderate',
|
||||
language: 'zh-CN',
|
||||
};
|
||||
|
||||
const positional = [];
|
||||
for (let index = 0; index < argv.length; index += 1) {
|
||||
const arg = argv[index];
|
||||
if (arg === '--count' || arg === '-n') {
|
||||
args.count = Number.parseInt(argv[++index] || '', 10);
|
||||
} else if (arg === '--download' || arg === '-d') {
|
||||
const next = argv[index + 1];
|
||||
if (next && !next.startsWith('-')) {
|
||||
args.download = Number.parseInt(next, 10);
|
||||
index += 1;
|
||||
} else {
|
||||
args.download = Math.min(args.count, 3);
|
||||
}
|
||||
} else if (arg === '--out-dir') {
|
||||
args.outDir = argv[++index] || args.outDir;
|
||||
} else if (arg === '--output' || arg === '-o') {
|
||||
args.output = argv[++index] || '';
|
||||
} else if (arg === '--safe') {
|
||||
args.safe = argv[++index] || args.safe;
|
||||
} else if (arg === '--language' || arg === '--lang') {
|
||||
args.language = argv[++index] || args.language;
|
||||
} else if (arg === '--help' || arg === '-h') {
|
||||
printHelp();
|
||||
process.exit(0);
|
||||
} else {
|
||||
positional.push(arg);
|
||||
}
|
||||
}
|
||||
|
||||
args.query = positional.join(' ').trim();
|
||||
args.count = Number.isFinite(args.count) ? Math.max(1, Math.min(MAX_COUNT, args.count)) : DEFAULT_COUNT;
|
||||
args.download = Number.isFinite(args.download) ? Math.max(0, Math.min(args.count, args.download)) : 0;
|
||||
return args;
|
||||
}
|
||||
|
||||
function printHelp() {
|
||||
process.stdout.write(`Usage:
|
||||
node search-images.mjs "<query>" [--count 8] [--download 3] [--out-dir <dir>] [--output <file>]
|
||||
|
||||
Options:
|
||||
--count, -n Number of image candidates to return, 1-${MAX_COUNT}.
|
||||
--download, -d Download the first N image candidates to local files.
|
||||
--out-dir Directory for downloaded images. Defaults to ~/.openclaw/media/image-search.
|
||||
--output, -o Write JSON result to a file in addition to stdout.
|
||||
--safe Safe search level passed to the image search page. Default: moderate.
|
||||
--language Accept-Language value. Default: zh-CN.
|
||||
`);
|
||||
}
|
||||
|
||||
function decodeHtml(value) {
|
||||
return String(value || '')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/&/g, '&')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>');
|
||||
}
|
||||
|
||||
function stripBingHighlights(value) {
|
||||
return decodeHtml(value)
|
||||
.replace(/\uE000|\uE001|\uE002|\uE003/g, '')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function siteNameFromUrl(url) {
|
||||
try {
|
||||
return new URL(url).hostname.replace(/^www\./, '');
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
function extFromMime(contentType) {
|
||||
const normalized = String(contentType || '').split(';')[0].trim().toLowerCase();
|
||||
if (normalized === 'image/jpeg' || normalized === 'image/jpg') return '.jpg';
|
||||
if (normalized === 'image/png') return '.png';
|
||||
if (normalized === 'image/webp') return '.webp';
|
||||
if (normalized === 'image/gif') return '.gif';
|
||||
if (normalized === 'image/avif') return '.avif';
|
||||
return '';
|
||||
}
|
||||
|
||||
function extFromUrl(url) {
|
||||
try {
|
||||
const ext = extname(new URL(url).pathname).toLowerCase();
|
||||
if (['.jpg', '.jpeg', '.png', '.webp', '.gif', '.avif'].includes(ext)) return ext === '.jpeg' ? '.jpg' : ext;
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
function stableId(value) {
|
||||
return createHash('sha1').update(value).digest('hex').slice(0, 12);
|
||||
}
|
||||
|
||||
function buildBingImagesUrl(params) {
|
||||
const url = new URL('https://www.bing.com/images/search');
|
||||
url.searchParams.set('q', params.query);
|
||||
url.searchParams.set('form', 'HDRSC2');
|
||||
url.searchParams.set('safeSearch', params.safe);
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
async function fetchText(url, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
'User-Agent': USER_AGENT,
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
},
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status} ${response.statusText}`);
|
||||
}
|
||||
return await response.text();
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
function parseBingImages(html, count) {
|
||||
const results = [];
|
||||
const seen = new Set();
|
||||
const matcher = /m="([^"]+)"/g;
|
||||
let match;
|
||||
|
||||
while ((match = matcher.exec(html)) && results.length < count) {
|
||||
const raw = decodeHtml(match[1]);
|
||||
let metadata;
|
||||
try {
|
||||
metadata = JSON.parse(raw);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
const imageUrl = typeof metadata.murl === 'string' ? metadata.murl.trim() : '';
|
||||
const thumbnailUrl = typeof metadata.turl === 'string' ? metadata.turl.trim() : '';
|
||||
const sourcePageUrl = typeof metadata.purl === 'string' ? metadata.purl.trim() : '';
|
||||
if (!imageUrl || seen.has(imageUrl)) continue;
|
||||
seen.add(imageUrl);
|
||||
|
||||
results.push({
|
||||
id: stableId(imageUrl),
|
||||
title: stripBingHighlights(metadata.t || metadata.desc || ''),
|
||||
description: stripBingHighlights(metadata.desc || ''),
|
||||
imageUrl,
|
||||
thumbnailUrl,
|
||||
sourcePageUrl,
|
||||
sourceName: siteNameFromUrl(sourcePageUrl || imageUrl),
|
||||
width: Number.isFinite(Number(metadata.w)) ? Number(metadata.w) : undefined,
|
||||
height: Number.isFinite(Number(metadata.h)) ? Number(metadata.h) : undefined,
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
async function downloadImage(result, outDir, index) {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), DEFAULT_TIMEOUT_MS);
|
||||
try {
|
||||
const response = await fetch(result.imageUrl, {
|
||||
signal: controller.signal,
|
||||
redirect: 'follow',
|
||||
headers: {
|
||||
'User-Agent': USER_AGENT,
|
||||
'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
|
||||
'Referer': result.sourcePageUrl || 'https://www.bing.com/',
|
||||
},
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status} ${response.statusText}`);
|
||||
}
|
||||
const contentType = response.headers.get('content-type') || '';
|
||||
if (!contentType.toLowerCase().startsWith('image/')) {
|
||||
throw new Error(`Not an image response: ${contentType || 'unknown content-type'}`);
|
||||
}
|
||||
const contentLength = Number(response.headers.get('content-length') || '0');
|
||||
if (contentLength > MAX_DOWNLOAD_BYTES) {
|
||||
throw new Error(`Image too large: ${contentLength} bytes`);
|
||||
}
|
||||
const bytes = Buffer.from(await response.arrayBuffer());
|
||||
if (bytes.byteLength > MAX_DOWNLOAD_BYTES) {
|
||||
throw new Error(`Image too large: ${bytes.byteLength} bytes`);
|
||||
}
|
||||
|
||||
await mkdir(outDir, { recursive: true });
|
||||
const ext = extFromMime(contentType) || extFromUrl(result.imageUrl) || '.jpg';
|
||||
const fileName = `${String(index + 1).padStart(2, '0')}-${result.id || randomUUID()}${ext}`;
|
||||
const filePath = join(outDir, fileName);
|
||||
await writeFile(filePath, bytes);
|
||||
|
||||
return {
|
||||
localPath: filePath,
|
||||
mimeType: contentType.split(';')[0].trim() || undefined,
|
||||
fileSize: bytes.byteLength,
|
||||
};
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
if (!args.query) {
|
||||
printHelp();
|
||||
process.exit(2);
|
||||
}
|
||||
|
||||
const searchUrl = buildBingImagesUrl(args);
|
||||
const startedAt = Date.now();
|
||||
const warnings = [
|
||||
'Web images may have copyright or usage restrictions. Use sourcePageUrl to verify rights before commercial use.',
|
||||
];
|
||||
|
||||
let results = [];
|
||||
try {
|
||||
const html = await fetchText(searchUrl);
|
||||
results = parseBingImages(html, args.count);
|
||||
} catch (error) {
|
||||
warnings.push(`Image search failed: ${error?.message || String(error)}`);
|
||||
}
|
||||
|
||||
const resolvedOutDir = resolve(args.outDir.replace(/^~(?=$|\/)/, homedir()));
|
||||
const downloadCount = Math.min(args.download, results.length);
|
||||
for (let index = 0; index < downloadCount; index += 1) {
|
||||
try {
|
||||
const download = await downloadImage(results[index], resolvedOutDir, index);
|
||||
results[index] = { ...results[index], ...download };
|
||||
} catch (error) {
|
||||
results[index] = {
|
||||
...results[index],
|
||||
downloadError: error?.message || String(error),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const payload = {
|
||||
success: results.length > 0,
|
||||
provider: 'bing-images-html',
|
||||
query: args.query,
|
||||
count: results.length,
|
||||
tookMs: Date.now() - startedAt,
|
||||
searchUrl,
|
||||
warnings,
|
||||
results,
|
||||
};
|
||||
|
||||
const json = `${JSON.stringify(payload, null, 2)}\n`;
|
||||
if (args.output) {
|
||||
const outputPath = resolve(args.output.replace(/^~(?=$|\/)/, homedir()));
|
||||
await mkdir(dirname(outputPath), { recursive: true });
|
||||
await writeFile(outputPath, json, 'utf8');
|
||||
}
|
||||
process.stdout.write(json);
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
process.stderr.write(`${error?.stack || error?.message || String(error)}\n`);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user