#!/usr/bin/env node import { createHash, randomUUID } from 'node:crypto'; import { mkdir, writeFile } from 'node:fs/promises'; import { dirname, extname, join, resolve } from 'node:path'; import { homedir } from 'node:os'; const DEFAULT_COUNT = 8; const MAX_COUNT = 20; const DEFAULT_TIMEOUT_MS = 18_000; const MAX_DOWNLOAD_BYTES = 15 * 1024 * 1024; const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'; const STOCK_PROVIDER_PRIORITY = ['pixabay', 'pexels', 'unsplash']; const STOCK_PROVIDER_SITE_QUERIES = { pixabay: 'site:pixabay.com', pexels: 'site:pexels.com', unsplash: 'site:unsplash.com', }; const STOCK_PROVIDER_IMAGE_HOST_PATTERNS = { pixabay: /https?:\/\/cdn\.pixabay\.com\/[^"'<>\s\\)]+/g, pexels: /https?:\/\/images\.pexels\.com\/[^"'<>\s\\)]+/g, unsplash: /https?:\/\/(?:images|plus)\.unsplash\.com\/[^"'<>\s\\)]+/g, }; function parseArgs(argv) { const args = { query: '', count: DEFAULT_COUNT, download: 0, outDir: join(homedir(), '.openclaw', 'media', 'image-search'), output: '', safe: 'moderate', language: 'zh-CN', }; const positional = []; for (let index = 0; index < argv.length; index += 1) { const arg = argv[index]; if (arg === '--count' || arg === '-n') { args.count = Number.parseInt(argv[++index] || '', 10); } else if (arg === '--download' || arg === '-d') { const next = argv[index + 1]; if (next && !next.startsWith('-')) { args.download = Number.parseInt(next, 10); index += 1; } else { args.download = Math.min(args.count, 3); } } else if (arg === '--out-dir') { args.outDir = argv[++index] || args.outDir; } else if (arg === '--output' || arg === '-o') { args.output = argv[++index] || ''; } else if (arg === '--safe') { args.safe = argv[++index] || args.safe; } else if (arg === '--language' || arg === '--lang') { args.language = argv[++index] || args.language; } else if (arg === '--help' || arg === '-h') { printHelp(); process.exit(0); } else { positional.push(arg); } } args.query = positional.join(' ').trim(); args.count = Number.isFinite(args.count) ? Math.max(1, Math.min(MAX_COUNT, args.count)) : DEFAULT_COUNT; args.download = Number.isFinite(args.download) ? Math.max(0, Math.min(args.count, args.download)) : 0; return args; } function printHelp() { process.stdout.write(`Usage: node search-images.mjs "" [--count 8] [--download 3] [--out-dir ] [--output ] Options: --count, -n Number of image candidates to return, 1-${MAX_COUNT}. --download, -d Download the first N image candidates to local files. --out-dir Directory for downloaded images. Defaults to ~/.openclaw/media/image-search. --output, -o Write JSON result to a file in addition to stdout. --safe Safe search level passed to the image search page. Default: moderate. --language Accept-Language value. Default: zh-CN. Provider priority: 1. Pixabay, Pexels, Unsplash official APIs when API keys are available. 2. Pixabay, Pexels, Unsplash site-limited image searches. 3. Generic Bing Images fallback. Optional API key environment variables: PIXABAY_API_KEY, PEXELS_API_KEY, UNSPLASH_ACCESS_KEY. `); } function decodeHtml(value) { return String(value || '') .replace(/"/g, '"') .replace(/"/g, '"') .replace(/&/g, '&') .replace(/'/g, "'") .replace(/</g, '<') .replace(/>/g, '>'); } function stripBingHighlights(value) { return decodeHtml(value) .replace(/\uE000|\uE001|\uE002|\uE003/g, '') .replace(/\s+/g, ' ') .trim(); } function siteNameFromUrl(url) { try { return new URL(url).hostname.replace(/^www\./, ''); } catch { return ''; } } function urlBelongsToProvider(url, provider) { try { const hostname = new URL(url).hostname.replace(/^www\./, '').toLowerCase(); if (provider === 'pixabay') return hostname === 'pixabay.com' || hostname.endsWith('.pixabay.com'); if (provider === 'pexels') return hostname === 'pexels.com' || hostname.endsWith('.pexels.com'); if (provider === 'unsplash') return hostname === 'unsplash.com' || hostname.endsWith('.unsplash.com'); } catch { return false; } return false; } function resultBelongsToProvider(result, provider) { return urlBelongsToProvider(result.sourcePageUrl, provider) || urlBelongsToProvider(result.imageUrl, provider); } function extFromMime(contentType) { const normalized = String(contentType || '').split(';')[0].trim().toLowerCase(); if (normalized === 'image/jpeg' || normalized === 'image/jpg') return '.jpg'; if (normalized === 'image/png') return '.png'; if (normalized === 'image/webp') return '.webp'; if (normalized === 'image/gif') return '.gif'; if (normalized === 'image/avif') return '.avif'; return ''; } function extFromUrl(url) { try { const ext = extname(new URL(url).pathname).toLowerCase(); if (['.jpg', '.jpeg', '.png', '.webp', '.gif', '.avif'].includes(ext)) return ext === '.jpeg' ? '.jpg' : ext; } catch { return ''; } return ''; } function stableId(value) { return createHash('sha1').update(value).digest('hex').slice(0, 12); } function buildBingImagesUrl(params) { const url = new URL('https://www.bing.com/images/search'); url.searchParams.set('q', params.query); url.searchParams.set('form', 'HDRSC2'); url.searchParams.set('safeSearch', params.safe); return url.toString(); } function buildBingImagesUrlForQuery(query, safe = 'moderate') { return buildBingImagesUrl({ query, safe }); } function mapPixabayLanguage(language) { const normalized = String(language || '').toLowerCase(); if (normalized.startsWith('zh')) return 'zh'; if (normalized.startsWith('en')) return 'en'; return normalized.split(/[-_]/)[0] || 'en'; } function slugifyQuery(value) { return String(value || '') .trim() .toLowerCase() .replace(/[^\p{L}\p{N}]+/gu, '-') .replace(/^-+|-+$/g, ''); } function buildProviderSearchPageUrl(provider, query) { if (provider === 'pixabay') { return `https://pixabay.com/images/search/${encodeURIComponent(query)}/`; } if (provider === 'pexels') { return `https://www.pexels.com/search/${encodeURIComponent(query)}/`; } if (provider === 'unsplash') { return `https://unsplash.com/s/photos/${encodeURIComponent(slugifyQuery(query) || query)}`; } return ''; } async function fetchText(url, timeoutMs = DEFAULT_TIMEOUT_MS) { const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), timeoutMs); try { const response = await fetch(url, { signal: controller.signal, headers: { 'User-Agent': USER_AGENT, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', }, }); if (!response.ok) { throw new Error(`HTTP ${response.status} ${response.statusText}`); } return await response.text(); } finally { clearTimeout(timer); } } async function fetchJson(url, options = {}, timeoutMs = DEFAULT_TIMEOUT_MS) { const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), timeoutMs); try { const response = await fetch(url, { ...options, signal: controller.signal, headers: { 'User-Agent': USER_AGENT, 'Accept': 'application/json,text/plain,*/*', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', ...(options.headers || {}), }, }); if (!response.ok) { throw new Error(`HTTP ${response.status} ${response.statusText}`); } return await response.json(); } finally { clearTimeout(timer); } } function normalizeResult(result) { return { id: result.id || stableId(result.imageUrl), title: stripBingHighlights(result.title || ''), description: stripBingHighlights(result.description || ''), imageUrl: result.imageUrl, thumbnailUrl: result.thumbnailUrl || result.imageUrl, sourcePageUrl: result.sourcePageUrl || result.imageUrl, sourceName: result.sourceName || siteNameFromUrl(result.sourcePageUrl || result.imageUrl), sourceProvider: result.sourceProvider || siteNameFromUrl(result.sourcePageUrl || result.imageUrl), width: result.width, height: result.height, }; } function parseBingImages(html, count, sourceProvider = 'bing') { const results = []; const seen = new Set(); const matcher = /m="([^"]+)"/g; let match; while ((match = matcher.exec(html)) && results.length < count) { const raw = decodeHtml(match[1]); let metadata; try { metadata = JSON.parse(raw); } catch { continue; } const imageUrl = typeof metadata.murl === 'string' ? metadata.murl.trim() : ''; const thumbnailUrl = typeof metadata.turl === 'string' ? metadata.turl.trim() : ''; const sourcePageUrl = typeof metadata.purl === 'string' ? metadata.purl.trim() : ''; if (!imageUrl || seen.has(imageUrl)) continue; seen.add(imageUrl); results.push(normalizeResult({ id: stableId(imageUrl), title: stripBingHighlights(metadata.t || metadata.desc || ''), description: stripBingHighlights(metadata.desc || ''), imageUrl, thumbnailUrl, sourcePageUrl, sourceName: siteNameFromUrl(sourcePageUrl || imageUrl), sourceProvider, width: Number.isFinite(Number(metadata.w)) ? Number(metadata.w) : undefined, height: Number.isFinite(Number(metadata.h)) ? Number(metadata.h) : undefined, })); } return results; } function parsePixabayResults(data, count) { const hits = Array.isArray(data?.hits) ? data.hits : []; return hits.slice(0, count).map((hit) => normalizeResult({ id: `pixabay-${hit.id || stableId(hit.largeImageURL || hit.webformatURL || hit.pageURL)}`, title: hit.tags || 'Pixabay image', description: hit.tags || '', imageUrl: hit.largeImageURL || hit.webformatURL || '', thumbnailUrl: hit.previewURL || hit.webformatURL || '', sourcePageUrl: hit.pageURL || '', sourceName: 'pixabay.com', sourceProvider: 'pixabay', width: Number.isFinite(Number(hit.imageWidth)) ? Number(hit.imageWidth) : undefined, height: Number.isFinite(Number(hit.imageHeight)) ? Number(hit.imageHeight) : undefined, })).filter((result) => result.imageUrl); } function parsePexelsResults(data, count) { const photos = Array.isArray(data?.photos) ? data.photos : []; return photos.slice(0, count).map((photo) => normalizeResult({ id: `pexels-${photo.id || stableId(photo.url || photo.src?.large2x || photo.src?.original)}`, title: photo.alt || 'Pexels photo', description: photo.alt || '', imageUrl: photo.src?.large2x || photo.src?.original || photo.src?.large || '', thumbnailUrl: photo.src?.medium || photo.src?.small || '', sourcePageUrl: photo.url || '', sourceName: 'pexels.com', sourceProvider: 'pexels', width: Number.isFinite(Number(photo.width)) ? Number(photo.width) : undefined, height: Number.isFinite(Number(photo.height)) ? Number(photo.height) : undefined, })).filter((result) => result.imageUrl); } function parseUnsplashResults(data, count) { const photos = Array.isArray(data?.results) ? data.results : []; return photos.slice(0, count).map((photo) => normalizeResult({ id: `unsplash-${photo.id || stableId(photo.links?.html || photo.urls?.regular)}`, title: photo.alt_description || photo.description || 'Unsplash photo', description: photo.description || photo.alt_description || '', imageUrl: photo.urls?.full || photo.urls?.regular || '', thumbnailUrl: photo.urls?.small || photo.urls?.thumb || '', sourcePageUrl: photo.links?.html || '', sourceName: 'unsplash.com', sourceProvider: 'unsplash', width: Number.isFinite(Number(photo.width)) ? Number(photo.width) : undefined, height: Number.isFinite(Number(photo.height)) ? Number(photo.height) : undefined, })).filter((result) => result.imageUrl); } function normalizeExtractedImageUrl(rawUrl, provider) { const decoded = decodeHtml(rawUrl); try { const url = new URL(decoded); if (provider === 'unsplash') { url.searchParams.set('w', '1600'); url.searchParams.set('auto', 'format'); url.searchParams.set('fit', 'crop'); url.searchParams.set('q', '80'); } if (provider === 'pexels') { url.searchParams.set('auto', 'compress'); url.searchParams.set('cs', 'tinysrgb'); url.searchParams.set('w', '1600'); } return url.toString(); } catch { return decoded; } } function imageDedupeKey(imageUrl) { try { const url = new URL(imageUrl); return `${url.hostname}${url.pathname}`; } catch { return imageUrl; } } function parseProviderPageImages(html, provider, searchUrl, count) { const regex = STOCK_PROVIDER_IMAGE_HOST_PATTERNS[provider]; if (!regex) return []; regex.lastIndex = 0; const decodedHtml = decodeHtml(html); const seen = new Set(); const results = []; let match; while ((match = regex.exec(decodedHtml)) && results.length < count) { const imageUrl = normalizeExtractedImageUrl(match[0], provider); const key = imageDedupeKey(imageUrl); if (seen.has(key)) continue; seen.add(key); results.push(normalizeResult({ id: `${provider}-${stableId(key)}`, title: `${provider} image result`, description: '', imageUrl, thumbnailUrl: imageUrl, sourcePageUrl: searchUrl, sourceName: `${provider}.com`, sourceProvider: provider, })); } return results; } async function searchPixabay(args, count) { const apiKey = process.env.PIXABAY_API_KEY || ''; if (!apiKey) return { provider: 'pixabay', skipped: 'PIXABAY_API_KEY is not configured', results: [] }; const url = new URL('https://pixabay.com/api/'); url.searchParams.set('key', apiKey); url.searchParams.set('q', args.query); url.searchParams.set('image_type', 'all'); url.searchParams.set('orientation', 'all'); url.searchParams.set('safesearch', args.safe === 'off' ? 'false' : 'true'); url.searchParams.set('lang', mapPixabayLanguage(args.language)); url.searchParams.set('per_page', String(Math.max(3, Math.min(200, count)))); const data = await fetchJson(url.toString()); return { provider: 'pixabay', searchUrl: url.toString().replace(apiKey, '***'), results: parsePixabayResults(data, count) }; } async function searchPexels(args, count) { const apiKey = process.env.PEXELS_API_KEY || ''; if (!apiKey) return { provider: 'pexels', skipped: 'PEXELS_API_KEY is not configured', results: [] }; const url = new URL('https://api.pexels.com/v1/search'); url.searchParams.set('query', args.query); url.searchParams.set('per_page', String(Math.max(1, Math.min(80, count)))); url.searchParams.set('locale', args.language || 'zh-CN'); const data = await fetchJson(url.toString(), { headers: { Authorization: apiKey, }, }); return { provider: 'pexels', searchUrl: url.toString(), results: parsePexelsResults(data, count) }; } async function searchUnsplash(args, count) { const apiKey = process.env.UNSPLASH_ACCESS_KEY || ''; if (!apiKey) return { provider: 'unsplash', skipped: 'UNSPLASH_ACCESS_KEY is not configured', results: [] }; const url = new URL('https://api.unsplash.com/search/photos'); url.searchParams.set('query', args.query); url.searchParams.set('per_page', String(Math.max(1, Math.min(30, count)))); url.searchParams.set('content_filter', args.safe === 'off' ? 'low' : 'high'); const data = await fetchJson(url.toString(), { headers: { Authorization: `Client-ID ${apiKey}`, }, }); return { provider: 'unsplash', searchUrl: url.toString(), results: parseUnsplashResults(data, count) }; } async function searchProviderPublicPage(args, provider, count) { const searchUrl = buildProviderSearchPageUrl(provider, args.query); const html = await fetchText(searchUrl); return { provider: `${provider}-public-page`, searchUrl, results: parseProviderPageImages(html, provider, searchUrl, count), }; } async function searchBingSiteProvider(args, provider, count) { const siteQuery = STOCK_PROVIDER_SITE_QUERIES[provider]; const query = `${siteQuery} ${args.query}`; const searchUrl = buildBingImagesUrlForQuery(query, args.safe); const html = await fetchText(searchUrl); const parsedResults = parseBingImages(html, Math.max(count * 5, 30), provider) .filter((result) => resultBelongsToProvider(result, provider)) .slice(0, count); return { provider: `${provider}-site-search`, searchUrl, results: parsedResults, }; } async function searchGenericBing(args, count) { const searchUrl = buildBingImagesUrl(args); const html = await fetchText(searchUrl); return { provider: 'bing-images-html', searchUrl, results: parseBingImages(html, count, 'bing'), }; } function appendUniqueResults(target, incoming, maxCount) { const seen = new Set(target.map((result) => result.imageUrl)); for (const result of incoming) { if (!result?.imageUrl || seen.has(result.imageUrl)) continue; seen.add(result.imageUrl); target.push(result); if (target.length >= maxCount) break; } } async function searchImages(args, warnings) { const results = []; const attempts = []; for (const searcher of [searchPixabay, searchPexels, searchUnsplash]) { if (results.length >= args.count) break; try { const attempt = await searcher(args, args.count - results.length); attempts.push({ provider: attempt.provider, searchUrl: attempt.searchUrl, skipped: attempt.skipped, count: attempt.results.length, }); appendUniqueResults(results, attempt.results, args.count); } catch (error) { const provider = searcher.name.replace(/^search/, '').toLowerCase(); attempts.push({ provider, error: error?.message || String(error), count: 0 }); warnings.push(`${provider} image search failed: ${error?.message || String(error)}`); } } for (const provider of STOCK_PROVIDER_PRIORITY) { if (results.length >= args.count) break; try { const attempt = await searchProviderPublicPage(args, provider, args.count - results.length); attempts.push({ provider: attempt.provider, searchUrl: attempt.searchUrl, count: attempt.results.length, }); appendUniqueResults(results, attempt.results, args.count); } catch (error) { attempts.push({ provider: `${provider}-public-page`, error: error?.message || String(error), count: 0 }); } } for (const provider of STOCK_PROVIDER_PRIORITY) { if (results.length >= args.count) break; try { const attempt = await searchBingSiteProvider(args, provider, args.count - results.length); attempts.push({ provider: attempt.provider, searchUrl: attempt.searchUrl, count: attempt.results.length, }); appendUniqueResults(results, attempt.results, args.count); } catch (error) { attempts.push({ provider: `${provider}-site-search`, error: error?.message || String(error), count: 0 }); } } if (results.length < args.count) { try { const attempt = await searchGenericBing(args, args.count - results.length); attempts.push({ provider: attempt.provider, searchUrl: attempt.searchUrl, count: attempt.results.length, }); appendUniqueResults(results, attempt.results, args.count); } catch (error) { attempts.push({ provider: 'bing-images-html', error: error?.message || String(error), count: 0 }); warnings.push(`Generic image search failed: ${error?.message || String(error)}`); } } return { results, attempts }; } async function downloadImage(result, outDir, index) { const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), DEFAULT_TIMEOUT_MS); try { const response = await fetch(result.imageUrl, { signal: controller.signal, redirect: 'follow', headers: { 'User-Agent': USER_AGENT, 'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', 'Referer': result.sourcePageUrl || 'https://www.bing.com/', }, }); if (!response.ok) { throw new Error(`HTTP ${response.status} ${response.statusText}`); } const contentType = response.headers.get('content-type') || ''; if (!contentType.toLowerCase().startsWith('image/')) { throw new Error(`Not an image response: ${contentType || 'unknown content-type'}`); } const contentLength = Number(response.headers.get('content-length') || '0'); if (contentLength > MAX_DOWNLOAD_BYTES) { throw new Error(`Image too large: ${contentLength} bytes`); } const bytes = Buffer.from(await response.arrayBuffer()); if (bytes.byteLength > MAX_DOWNLOAD_BYTES) { throw new Error(`Image too large: ${bytes.byteLength} bytes`); } await mkdir(outDir, { recursive: true }); const ext = extFromMime(contentType) || extFromUrl(result.imageUrl) || '.jpg'; const fileName = `${String(index + 1).padStart(2, '0')}-${result.id || randomUUID()}${ext}`; const filePath = join(outDir, fileName); await writeFile(filePath, bytes); return { localPath: filePath, mimeType: contentType.split(';')[0].trim() || undefined, fileSize: bytes.byteLength, }; } finally { clearTimeout(timer); } } async function main() { const args = parseArgs(process.argv.slice(2)); if (!args.query) { printHelp(); process.exit(2); } const startedAt = Date.now(); const warnings = [ 'Web images may have copyright or usage restrictions. Use sourcePageUrl to verify rights before commercial use.', ]; const { results, attempts } = await searchImages(args, warnings); const resolvedOutDir = resolve(args.outDir.replace(/^~(?=$|\/)/, homedir())); const downloadCount = Math.min(args.download, results.length); for (let index = 0; index < downloadCount; index += 1) { try { const download = await downloadImage(results[index], resolvedOutDir, index); results[index] = { ...results[index], ...download }; } catch (error) { results[index] = { ...results[index], downloadError: error?.message || String(error), }; } } const payload = { success: results.length > 0, provider: 'stock-priority', providerPriority: [...STOCK_PROVIDER_PRIORITY, 'bing-images-html'], query: args.query, count: results.length, tookMs: Date.now() - startedAt, searchAttempts: attempts, warnings, results, }; const json = `${JSON.stringify(payload, null, 2)}\n`; if (args.output) { const outputPath = resolve(args.output.replace(/^~(?=$|\/)/, homedir())); await mkdir(dirname(outputPath), { recursive: true }); await writeFile(outputPath, json, 'utf8'); } process.stdout.write(json); } main().catch((error) => { process.stderr.write(`${error?.stack || error?.message || String(error)}\n`); process.exit(1); });