Files
NianToB/resources/skills/local/image-search/scripts/search-images.mjs

659 lines
23 KiB
JavaScript

#!/usr/bin/env node
import { createHash, randomUUID } from 'node:crypto';
import { mkdir, writeFile } from 'node:fs/promises';
import { dirname, extname, join, resolve } from 'node:path';
import { homedir } from 'node:os';
const DEFAULT_COUNT = 8;
const MAX_COUNT = 20;
const DEFAULT_TIMEOUT_MS = 18_000;
const MAX_DOWNLOAD_BYTES = 15 * 1024 * 1024;
const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36';
const STOCK_PROVIDER_PRIORITY = ['pixabay', 'pexels', 'unsplash'];
const STOCK_PROVIDER_SITE_QUERIES = {
pixabay: 'site:pixabay.com',
pexels: 'site:pexels.com',
unsplash: 'site:unsplash.com',
};
const STOCK_PROVIDER_IMAGE_HOST_PATTERNS = {
pixabay: /https?:\/\/cdn\.pixabay\.com\/[^"'<>\s\\)]+/g,
pexels: /https?:\/\/images\.pexels\.com\/[^"'<>\s\\)]+/g,
unsplash: /https?:\/\/(?:images|plus)\.unsplash\.com\/[^"'<>\s\\)]+/g,
};
function parseArgs(argv) {
const args = {
query: '',
count: DEFAULT_COUNT,
download: 0,
outDir: join(homedir(), '.openclaw', 'media', 'image-search'),
output: '',
safe: 'moderate',
language: 'zh-CN',
};
const positional = [];
for (let index = 0; index < argv.length; index += 1) {
const arg = argv[index];
if (arg === '--count' || arg === '-n') {
args.count = Number.parseInt(argv[++index] || '', 10);
} else if (arg === '--download' || arg === '-d') {
const next = argv[index + 1];
if (next && !next.startsWith('-')) {
args.download = Number.parseInt(next, 10);
index += 1;
} else {
args.download = Math.min(args.count, 3);
}
} else if (arg === '--out-dir') {
args.outDir = argv[++index] || args.outDir;
} else if (arg === '--output' || arg === '-o') {
args.output = argv[++index] || '';
} else if (arg === '--safe') {
args.safe = argv[++index] || args.safe;
} else if (arg === '--language' || arg === '--lang') {
args.language = argv[++index] || args.language;
} else if (arg === '--help' || arg === '-h') {
printHelp();
process.exit(0);
} else {
positional.push(arg);
}
}
args.query = positional.join(' ').trim();
args.count = Number.isFinite(args.count) ? Math.max(1, Math.min(MAX_COUNT, args.count)) : DEFAULT_COUNT;
args.download = Number.isFinite(args.download) ? Math.max(0, Math.min(args.count, args.download)) : 0;
return args;
}
function printHelp() {
process.stdout.write(`Usage:
node search-images.mjs "<query>" [--count 8] [--download 3] [--out-dir <dir>] [--output <file>]
Options:
--count, -n Number of image candidates to return, 1-${MAX_COUNT}.
--download, -d Download the first N image candidates to local files.
--out-dir Directory for downloaded images. Defaults to ~/.openclaw/media/image-search.
--output, -o Write JSON result to a file in addition to stdout.
--safe Safe search level passed to the image search page. Default: moderate.
--language Accept-Language value. Default: zh-CN.
Provider priority:
1. Pixabay, Pexels, Unsplash official APIs when API keys are available.
2. Pixabay, Pexels, Unsplash site-limited image searches.
3. Generic Bing Images fallback.
Optional API key environment variables:
PIXABAY_API_KEY, PEXELS_API_KEY, UNSPLASH_ACCESS_KEY.
`);
}
function decodeHtml(value) {
return String(value || '')
.replace(/&quot;/g, '"')
.replace(/&#34;/g, '"')
.replace(/&amp;/g, '&')
.replace(/&#39;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>');
}
function stripBingHighlights(value) {
return decodeHtml(value)
.replace(/\uE000|\uE001|\uE002|\uE003/g, '')
.replace(/\s+/g, ' ')
.trim();
}
function siteNameFromUrl(url) {
try {
return new URL(url).hostname.replace(/^www\./, '');
} catch {
return '';
}
}
function urlBelongsToProvider(url, provider) {
try {
const hostname = new URL(url).hostname.replace(/^www\./, '').toLowerCase();
if (provider === 'pixabay') return hostname === 'pixabay.com' || hostname.endsWith('.pixabay.com');
if (provider === 'pexels') return hostname === 'pexels.com' || hostname.endsWith('.pexels.com');
if (provider === 'unsplash') return hostname === 'unsplash.com' || hostname.endsWith('.unsplash.com');
} catch {
return false;
}
return false;
}
function resultBelongsToProvider(result, provider) {
return urlBelongsToProvider(result.sourcePageUrl, provider) || urlBelongsToProvider(result.imageUrl, provider);
}
function extFromMime(contentType) {
const normalized = String(contentType || '').split(';')[0].trim().toLowerCase();
if (normalized === 'image/jpeg' || normalized === 'image/jpg') return '.jpg';
if (normalized === 'image/png') return '.png';
if (normalized === 'image/webp') return '.webp';
if (normalized === 'image/gif') return '.gif';
if (normalized === 'image/avif') return '.avif';
return '';
}
function extFromUrl(url) {
try {
const ext = extname(new URL(url).pathname).toLowerCase();
if (['.jpg', '.jpeg', '.png', '.webp', '.gif', '.avif'].includes(ext)) return ext === '.jpeg' ? '.jpg' : ext;
} catch {
return '';
}
return '';
}
function stableId(value) {
return createHash('sha1').update(value).digest('hex').slice(0, 12);
}
function buildBingImagesUrl(params) {
const url = new URL('https://www.bing.com/images/search');
url.searchParams.set('q', params.query);
url.searchParams.set('form', 'HDRSC2');
url.searchParams.set('safeSearch', params.safe);
return url.toString();
}
function buildBingImagesUrlForQuery(query, safe = 'moderate') {
return buildBingImagesUrl({ query, safe });
}
function mapPixabayLanguage(language) {
const normalized = String(language || '').toLowerCase();
if (normalized.startsWith('zh')) return 'zh';
if (normalized.startsWith('en')) return 'en';
return normalized.split(/[-_]/)[0] || 'en';
}
function slugifyQuery(value) {
return String(value || '')
.trim()
.toLowerCase()
.replace(/[^\p{L}\p{N}]+/gu, '-')
.replace(/^-+|-+$/g, '');
}
function buildProviderSearchPageUrl(provider, query) {
if (provider === 'pixabay') {
return `https://pixabay.com/images/search/${encodeURIComponent(query)}/`;
}
if (provider === 'pexels') {
return `https://www.pexels.com/search/${encodeURIComponent(query)}/`;
}
if (provider === 'unsplash') {
return `https://unsplash.com/s/photos/${encodeURIComponent(slugifyQuery(query) || query)}`;
}
return '';
}
async function fetchText(url, timeoutMs = DEFAULT_TIMEOUT_MS) {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
try {
const response = await fetch(url, {
signal: controller.signal,
headers: {
'User-Agent': USER_AGENT,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
},
});
if (!response.ok) {
throw new Error(`HTTP ${response.status} ${response.statusText}`);
}
return await response.text();
} finally {
clearTimeout(timer);
}
}
async function fetchJson(url, options = {}, timeoutMs = DEFAULT_TIMEOUT_MS) {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
try {
const response = await fetch(url, {
...options,
signal: controller.signal,
headers: {
'User-Agent': USER_AGENT,
'Accept': 'application/json,text/plain,*/*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
...(options.headers || {}),
},
});
if (!response.ok) {
throw new Error(`HTTP ${response.status} ${response.statusText}`);
}
return await response.json();
} finally {
clearTimeout(timer);
}
}
function normalizeResult(result) {
return {
id: result.id || stableId(result.imageUrl),
title: stripBingHighlights(result.title || ''),
description: stripBingHighlights(result.description || ''),
imageUrl: result.imageUrl,
thumbnailUrl: result.thumbnailUrl || result.imageUrl,
sourcePageUrl: result.sourcePageUrl || result.imageUrl,
sourceName: result.sourceName || siteNameFromUrl(result.sourcePageUrl || result.imageUrl),
sourceProvider: result.sourceProvider || siteNameFromUrl(result.sourcePageUrl || result.imageUrl),
width: result.width,
height: result.height,
};
}
function parseBingImages(html, count, sourceProvider = 'bing') {
const results = [];
const seen = new Set();
const matcher = /m="([^"]+)"/g;
let match;
while ((match = matcher.exec(html)) && results.length < count) {
const raw = decodeHtml(match[1]);
let metadata;
try {
metadata = JSON.parse(raw);
} catch {
continue;
}
const imageUrl = typeof metadata.murl === 'string' ? metadata.murl.trim() : '';
const thumbnailUrl = typeof metadata.turl === 'string' ? metadata.turl.trim() : '';
const sourcePageUrl = typeof metadata.purl === 'string' ? metadata.purl.trim() : '';
if (!imageUrl || seen.has(imageUrl)) continue;
seen.add(imageUrl);
results.push(normalizeResult({
id: stableId(imageUrl),
title: stripBingHighlights(metadata.t || metadata.desc || ''),
description: stripBingHighlights(metadata.desc || ''),
imageUrl,
thumbnailUrl,
sourcePageUrl,
sourceName: siteNameFromUrl(sourcePageUrl || imageUrl),
sourceProvider,
width: Number.isFinite(Number(metadata.w)) ? Number(metadata.w) : undefined,
height: Number.isFinite(Number(metadata.h)) ? Number(metadata.h) : undefined,
}));
}
return results;
}
function parsePixabayResults(data, count) {
const hits = Array.isArray(data?.hits) ? data.hits : [];
return hits.slice(0, count).map((hit) => normalizeResult({
id: `pixabay-${hit.id || stableId(hit.largeImageURL || hit.webformatURL || hit.pageURL)}`,
title: hit.tags || 'Pixabay image',
description: hit.tags || '',
imageUrl: hit.largeImageURL || hit.webformatURL || '',
thumbnailUrl: hit.previewURL || hit.webformatURL || '',
sourcePageUrl: hit.pageURL || '',
sourceName: 'pixabay.com',
sourceProvider: 'pixabay',
width: Number.isFinite(Number(hit.imageWidth)) ? Number(hit.imageWidth) : undefined,
height: Number.isFinite(Number(hit.imageHeight)) ? Number(hit.imageHeight) : undefined,
})).filter((result) => result.imageUrl);
}
function parsePexelsResults(data, count) {
const photos = Array.isArray(data?.photos) ? data.photos : [];
return photos.slice(0, count).map((photo) => normalizeResult({
id: `pexels-${photo.id || stableId(photo.url || photo.src?.large2x || photo.src?.original)}`,
title: photo.alt || 'Pexels photo',
description: photo.alt || '',
imageUrl: photo.src?.large2x || photo.src?.original || photo.src?.large || '',
thumbnailUrl: photo.src?.medium || photo.src?.small || '',
sourcePageUrl: photo.url || '',
sourceName: 'pexels.com',
sourceProvider: 'pexels',
width: Number.isFinite(Number(photo.width)) ? Number(photo.width) : undefined,
height: Number.isFinite(Number(photo.height)) ? Number(photo.height) : undefined,
})).filter((result) => result.imageUrl);
}
function parseUnsplashResults(data, count) {
const photos = Array.isArray(data?.results) ? data.results : [];
return photos.slice(0, count).map((photo) => normalizeResult({
id: `unsplash-${photo.id || stableId(photo.links?.html || photo.urls?.regular)}`,
title: photo.alt_description || photo.description || 'Unsplash photo',
description: photo.description || photo.alt_description || '',
imageUrl: photo.urls?.full || photo.urls?.regular || '',
thumbnailUrl: photo.urls?.small || photo.urls?.thumb || '',
sourcePageUrl: photo.links?.html || '',
sourceName: 'unsplash.com',
sourceProvider: 'unsplash',
width: Number.isFinite(Number(photo.width)) ? Number(photo.width) : undefined,
height: Number.isFinite(Number(photo.height)) ? Number(photo.height) : undefined,
})).filter((result) => result.imageUrl);
}
function normalizeExtractedImageUrl(rawUrl, provider) {
const decoded = decodeHtml(rawUrl);
try {
const url = new URL(decoded);
if (provider === 'unsplash') {
url.searchParams.set('w', '1600');
url.searchParams.set('auto', 'format');
url.searchParams.set('fit', 'crop');
url.searchParams.set('q', '80');
}
if (provider === 'pexels') {
url.searchParams.set('auto', 'compress');
url.searchParams.set('cs', 'tinysrgb');
url.searchParams.set('w', '1600');
}
return url.toString();
} catch {
return decoded;
}
}
function imageDedupeKey(imageUrl) {
try {
const url = new URL(imageUrl);
return `${url.hostname}${url.pathname}`;
} catch {
return imageUrl;
}
}
function parseProviderPageImages(html, provider, searchUrl, count) {
const regex = STOCK_PROVIDER_IMAGE_HOST_PATTERNS[provider];
if (!regex) return [];
regex.lastIndex = 0;
const decodedHtml = decodeHtml(html);
const seen = new Set();
const results = [];
let match;
while ((match = regex.exec(decodedHtml)) && results.length < count) {
const imageUrl = normalizeExtractedImageUrl(match[0], provider);
const key = imageDedupeKey(imageUrl);
if (seen.has(key)) continue;
seen.add(key);
results.push(normalizeResult({
id: `${provider}-${stableId(key)}`,
title: `${provider} image result`,
description: '',
imageUrl,
thumbnailUrl: imageUrl,
sourcePageUrl: searchUrl,
sourceName: `${provider}.com`,
sourceProvider: provider,
}));
}
return results;
}
async function searchPixabay(args, count) {
const apiKey = process.env.PIXABAY_API_KEY || '';
if (!apiKey) return { provider: 'pixabay', skipped: 'PIXABAY_API_KEY is not configured', results: [] };
const url = new URL('https://pixabay.com/api/');
url.searchParams.set('key', apiKey);
url.searchParams.set('q', args.query);
url.searchParams.set('image_type', 'all');
url.searchParams.set('orientation', 'all');
url.searchParams.set('safesearch', args.safe === 'off' ? 'false' : 'true');
url.searchParams.set('lang', mapPixabayLanguage(args.language));
url.searchParams.set('per_page', String(Math.max(3, Math.min(200, count))));
const data = await fetchJson(url.toString());
return { provider: 'pixabay', searchUrl: url.toString().replace(apiKey, '***'), results: parsePixabayResults(data, count) };
}
async function searchPexels(args, count) {
const apiKey = process.env.PEXELS_API_KEY || '';
if (!apiKey) return { provider: 'pexels', skipped: 'PEXELS_API_KEY is not configured', results: [] };
const url = new URL('https://api.pexels.com/v1/search');
url.searchParams.set('query', args.query);
url.searchParams.set('per_page', String(Math.max(1, Math.min(80, count))));
url.searchParams.set('locale', args.language || 'zh-CN');
const data = await fetchJson(url.toString(), {
headers: {
Authorization: apiKey,
},
});
return { provider: 'pexels', searchUrl: url.toString(), results: parsePexelsResults(data, count) };
}
async function searchUnsplash(args, count) {
const apiKey = process.env.UNSPLASH_ACCESS_KEY || '';
if (!apiKey) return { provider: 'unsplash', skipped: 'UNSPLASH_ACCESS_KEY is not configured', results: [] };
const url = new URL('https://api.unsplash.com/search/photos');
url.searchParams.set('query', args.query);
url.searchParams.set('per_page', String(Math.max(1, Math.min(30, count))));
url.searchParams.set('content_filter', args.safe === 'off' ? 'low' : 'high');
const data = await fetchJson(url.toString(), {
headers: {
Authorization: `Client-ID ${apiKey}`,
},
});
return { provider: 'unsplash', searchUrl: url.toString(), results: parseUnsplashResults(data, count) };
}
async function searchProviderPublicPage(args, provider, count) {
const searchUrl = buildProviderSearchPageUrl(provider, args.query);
const html = await fetchText(searchUrl);
return {
provider: `${provider}-public-page`,
searchUrl,
results: parseProviderPageImages(html, provider, searchUrl, count),
};
}
async function searchBingSiteProvider(args, provider, count) {
const siteQuery = STOCK_PROVIDER_SITE_QUERIES[provider];
const query = `${siteQuery} ${args.query}`;
const searchUrl = buildBingImagesUrlForQuery(query, args.safe);
const html = await fetchText(searchUrl);
const parsedResults = parseBingImages(html, Math.max(count * 5, 30), provider)
.filter((result) => resultBelongsToProvider(result, provider))
.slice(0, count);
return {
provider: `${provider}-site-search`,
searchUrl,
results: parsedResults,
};
}
async function searchGenericBing(args, count) {
const searchUrl = buildBingImagesUrl(args);
const html = await fetchText(searchUrl);
return {
provider: 'bing-images-html',
searchUrl,
results: parseBingImages(html, count, 'bing'),
};
}
function appendUniqueResults(target, incoming, maxCount) {
const seen = new Set(target.map((result) => result.imageUrl));
for (const result of incoming) {
if (!result?.imageUrl || seen.has(result.imageUrl)) continue;
seen.add(result.imageUrl);
target.push(result);
if (target.length >= maxCount) break;
}
}
async function searchImages(args, warnings) {
const results = [];
const attempts = [];
for (const searcher of [searchPixabay, searchPexels, searchUnsplash]) {
if (results.length >= args.count) break;
try {
const attempt = await searcher(args, args.count - results.length);
attempts.push({
provider: attempt.provider,
searchUrl: attempt.searchUrl,
skipped: attempt.skipped,
count: attempt.results.length,
});
appendUniqueResults(results, attempt.results, args.count);
} catch (error) {
const provider = searcher.name.replace(/^search/, '').toLowerCase();
attempts.push({ provider, error: error?.message || String(error), count: 0 });
warnings.push(`${provider} image search failed: ${error?.message || String(error)}`);
}
}
for (const provider of STOCK_PROVIDER_PRIORITY) {
if (results.length >= args.count) break;
try {
const attempt = await searchProviderPublicPage(args, provider, args.count - results.length);
attempts.push({
provider: attempt.provider,
searchUrl: attempt.searchUrl,
count: attempt.results.length,
});
appendUniqueResults(results, attempt.results, args.count);
} catch (error) {
attempts.push({ provider: `${provider}-public-page`, error: error?.message || String(error), count: 0 });
}
}
for (const provider of STOCK_PROVIDER_PRIORITY) {
if (results.length >= args.count) break;
try {
const attempt = await searchBingSiteProvider(args, provider, args.count - results.length);
attempts.push({
provider: attempt.provider,
searchUrl: attempt.searchUrl,
count: attempt.results.length,
});
appendUniqueResults(results, attempt.results, args.count);
} catch (error) {
attempts.push({ provider: `${provider}-site-search`, error: error?.message || String(error), count: 0 });
}
}
if (results.length < args.count) {
try {
const attempt = await searchGenericBing(args, args.count - results.length);
attempts.push({
provider: attempt.provider,
searchUrl: attempt.searchUrl,
count: attempt.results.length,
});
appendUniqueResults(results, attempt.results, args.count);
} catch (error) {
attempts.push({ provider: 'bing-images-html', error: error?.message || String(error), count: 0 });
warnings.push(`Generic image search failed: ${error?.message || String(error)}`);
}
}
return { results, attempts };
}
async function downloadImage(result, outDir, index) {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), DEFAULT_TIMEOUT_MS);
try {
const response = await fetch(result.imageUrl, {
signal: controller.signal,
redirect: 'follow',
headers: {
'User-Agent': USER_AGENT,
'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
'Referer': result.sourcePageUrl || 'https://www.bing.com/',
},
});
if (!response.ok) {
throw new Error(`HTTP ${response.status} ${response.statusText}`);
}
const contentType = response.headers.get('content-type') || '';
if (!contentType.toLowerCase().startsWith('image/')) {
throw new Error(`Not an image response: ${contentType || 'unknown content-type'}`);
}
const contentLength = Number(response.headers.get('content-length') || '0');
if (contentLength > MAX_DOWNLOAD_BYTES) {
throw new Error(`Image too large: ${contentLength} bytes`);
}
const bytes = Buffer.from(await response.arrayBuffer());
if (bytes.byteLength > MAX_DOWNLOAD_BYTES) {
throw new Error(`Image too large: ${bytes.byteLength} bytes`);
}
await mkdir(outDir, { recursive: true });
const ext = extFromMime(contentType) || extFromUrl(result.imageUrl) || '.jpg';
const fileName = `${String(index + 1).padStart(2, '0')}-${result.id || randomUUID()}${ext}`;
const filePath = join(outDir, fileName);
await writeFile(filePath, bytes);
return {
localPath: filePath,
mimeType: contentType.split(';')[0].trim() || undefined,
fileSize: bytes.byteLength,
};
} finally {
clearTimeout(timer);
}
}
async function main() {
const args = parseArgs(process.argv.slice(2));
if (!args.query) {
printHelp();
process.exit(2);
}
const startedAt = Date.now();
const warnings = [
'Web images may have copyright or usage restrictions. Use sourcePageUrl to verify rights before commercial use.',
];
const { results, attempts } = await searchImages(args, warnings);
const resolvedOutDir = resolve(args.outDir.replace(/^~(?=$|\/)/, homedir()));
const downloadCount = Math.min(args.download, results.length);
for (let index = 0; index < downloadCount; index += 1) {
try {
const download = await downloadImage(results[index], resolvedOutDir, index);
results[index] = { ...results[index], ...download };
} catch (error) {
results[index] = {
...results[index],
downloadError: error?.message || String(error),
};
}
}
const payload = {
success: results.length > 0,
provider: 'stock-priority',
providerPriority: [...STOCK_PROVIDER_PRIORITY, 'bing-images-html'],
query: args.query,
count: results.length,
tookMs: Date.now() - startedAt,
searchAttempts: attempts,
warnings,
results,
};
const json = `${JSON.stringify(payload, null, 2)}\n`;
if (args.output) {
const outputPath = resolve(args.output.replace(/^~(?=$|\/)/, homedir()));
await mkdir(dirname(outputPath), { recursive: true });
await writeFile(outputPath, json, 'utf8');
}
process.stdout.write(json);
}
main().catch((error) => {
process.stderr.write(`${error?.stack || error?.message || String(error)}\n`);
process.exit(1);
});