661 lines
25 KiB
JavaScript
661 lines
25 KiB
JavaScript
#!/usr/bin/env node
|
||
import fs from "node:fs";
|
||
import path from "node:path";
|
||
import http from "node:http";
|
||
import { spawn } from "node:child_process";
|
||
|
||
const ROOT = path.resolve(new URL("..", import.meta.url).pathname, "..");
|
||
const BASE_DIR = process.env.TRAVEL_DELIVERY_ROOT || path.join(ROOT, "data", "source", "travel_delivery_20260602");
|
||
const OUT_DIR = path.join(BASE_DIR, "amap_js_enriched");
|
||
const CACHE_FILE = path.join(OUT_DIR, "_amap_js_cache.json");
|
||
const CHROME = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome";
|
||
const PROFILE = "/tmp/znkg-amap-js-enrich-profile";
|
||
const DEBUG_PORT = 9231;
|
||
|
||
const SCENIC_FILE = path.join(BASE_DIR, "scenic_for_amap.csv");
|
||
const HOTEL_FILE = path.join(BASE_DIR, "hotel_poi.csv");
|
||
const RESTAURANT_FILE = path.join(BASE_DIR, "restaurant_poi.csv");
|
||
|
||
function readEnvKey(file, key) {
|
||
const txt = fs.existsSync(file) ? fs.readFileSync(file, "utf8") : "";
|
||
for (const line of txt.split(/\r?\n/)) {
|
||
if (line.trim().startsWith(`${key}=`)) return line.split("=").slice(1).join("=").trim().replace(/^["']|["']$/g, "");
|
||
}
|
||
return "";
|
||
}
|
||
|
||
function parseCsv(text) {
|
||
text = text.replace(/^\uFEFF/, "");
|
||
const rows = [];
|
||
let row = [];
|
||
let cell = "";
|
||
let quoted = false;
|
||
for (let i = 0; i < text.length; i += 1) {
|
||
const ch = text[i];
|
||
const next = text[i + 1];
|
||
if (quoted) {
|
||
if (ch === '"' && next === '"') {
|
||
cell += '"';
|
||
i += 1;
|
||
} else if (ch === '"') {
|
||
quoted = false;
|
||
} else {
|
||
cell += ch;
|
||
}
|
||
} else if (ch === '"') {
|
||
quoted = true;
|
||
} else if (ch === ",") {
|
||
row.push(cell);
|
||
cell = "";
|
||
} else if (ch === "\n") {
|
||
row.push(cell);
|
||
rows.push(row);
|
||
row = [];
|
||
cell = "";
|
||
} else if (ch !== "\r") {
|
||
cell += ch;
|
||
}
|
||
}
|
||
if (cell || row.length) {
|
||
row.push(cell);
|
||
rows.push(row);
|
||
}
|
||
const headers = rows.shift() || [];
|
||
const data = rows
|
||
.filter((r) => r.some((v) => String(v || "").trim()))
|
||
.map((r) => Object.fromEntries(headers.map((h, i) => [h, r[i] ?? ""])));
|
||
return { headers, rows: data };
|
||
}
|
||
|
||
function csvEscape(value) {
|
||
const s = value == null ? "" : String(value);
|
||
return /[",\r\n]/.test(s) ? `"${s.replace(/"/g, '""')}"` : s;
|
||
}
|
||
|
||
function writeCsv(file, rows, preferred = []) {
|
||
fs.mkdirSync(path.dirname(file), { recursive: true });
|
||
const headers = [];
|
||
for (const h of preferred) if (!headers.includes(h)) headers.push(h);
|
||
for (const row of rows) {
|
||
for (const h of Object.keys(row)) if (!headers.includes(h)) headers.push(h);
|
||
}
|
||
const lines = [headers.map(csvEscape).join(",")];
|
||
for (const row of rows) lines.push(headers.map((h) => csvEscape(row[h])).join(","));
|
||
fs.writeFileSync(file, `\uFEFF${lines.join("\n")}\n`, "utf8");
|
||
}
|
||
|
||
function readCsv(file) {
|
||
return parseCsv(fs.readFileSync(file, "utf8"));
|
||
}
|
||
|
||
function loadCache() {
|
||
if (!fs.existsSync(CACHE_FILE)) return {};
|
||
return JSON.parse(fs.readFileSync(CACHE_FILE, "utf8"));
|
||
}
|
||
|
||
function saveCache(cache) {
|
||
fs.mkdirSync(path.dirname(CACHE_FILE), { recursive: true });
|
||
fs.writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2), "utf8");
|
||
}
|
||
|
||
function httpJson(url) {
|
||
return new Promise((resolve, reject) => {
|
||
http.get(url, (res) => {
|
||
let data = "";
|
||
res.on("data", (chunk) => (data += chunk));
|
||
res.on("end", () => {
|
||
try {
|
||
resolve(JSON.parse(data));
|
||
} catch (e) {
|
||
reject(e);
|
||
}
|
||
});
|
||
}).on("error", reject);
|
||
});
|
||
}
|
||
|
||
class Cdp {
|
||
constructor(wsUrl) {
|
||
this.ws = new WebSocket(wsUrl);
|
||
this.seq = 1;
|
||
this.pending = new Map();
|
||
this.ws.onmessage = (event) => {
|
||
const msg = JSON.parse(event.data);
|
||
if (msg.id && this.pending.has(msg.id)) {
|
||
const { resolve, reject } = this.pending.get(msg.id);
|
||
this.pending.delete(msg.id);
|
||
if (msg.error) reject(new Error(JSON.stringify(msg.error)));
|
||
else resolve(msg.result);
|
||
}
|
||
};
|
||
}
|
||
async open() {
|
||
await new Promise((resolve, reject) => {
|
||
this.ws.onopen = resolve;
|
||
this.ws.onerror = reject;
|
||
});
|
||
}
|
||
call(method, params = {}) {
|
||
const id = this.seq++;
|
||
this.ws.send(JSON.stringify({ id, method, params }));
|
||
return new Promise((resolve, reject) => this.pending.set(id, { resolve, reject }));
|
||
}
|
||
close() {
|
||
this.ws.close();
|
||
}
|
||
}
|
||
|
||
async function wait(ms) {
|
||
await new Promise((resolve) => setTimeout(resolve, ms));
|
||
}
|
||
|
||
async function launchCdp() {
|
||
const chrome = spawn(
|
||
CHROME,
|
||
[
|
||
"--headless=new",
|
||
"--disable-gpu",
|
||
"--no-first-run",
|
||
"--no-default-browser-check",
|
||
`--remote-debugging-port=${DEBUG_PORT}`,
|
||
`--user-data-dir=${PROFILE}`,
|
||
"http://localhost:8102/admin/plaza/graph",
|
||
],
|
||
{ stdio: "ignore" },
|
||
);
|
||
let pages = null;
|
||
for (let i = 0; i < 40; i += 1) {
|
||
try {
|
||
pages = await httpJson(`http://127.0.0.1:${DEBUG_PORT}/json`);
|
||
if (pages?.[0]?.webSocketDebuggerUrl) break;
|
||
} catch {}
|
||
await wait(300);
|
||
}
|
||
if (!pages?.[0]?.webSocketDebuggerUrl) {
|
||
chrome.kill();
|
||
throw new Error("Chrome CDP not ready");
|
||
}
|
||
const cdp = new Cdp(pages[0].webSocketDebuggerUrl);
|
||
await cdp.open();
|
||
await cdp.call("Runtime.enable");
|
||
return { chrome, cdp };
|
||
}
|
||
|
||
function jsString(v) {
|
||
return JSON.stringify(v);
|
||
}
|
||
|
||
async function initAmap(cdp) {
|
||
const envPath = process.env.TRAVEL_KG_ENV_PATH || path.join(ROOT, ".env");
|
||
const key = readEnvKey(envPath, "AMAP_JS_KEY");
|
||
const security = readEnvKey(envPath, "AMAP_SECURITY_JSCODE");
|
||
if (!key || !security) throw new Error("missing AMap JS key/security");
|
||
const expr = `
|
||
(async () => {
|
||
if (!document.documentElement || !document.head || !document.body) {
|
||
document.open();
|
||
document.write('<!doctype html><html><head><meta charset="utf-8"></head><body><div id="amap-root"></div></body></html>');
|
||
document.close();
|
||
}
|
||
if (!document.head) document.documentElement.insertBefore(document.createElement('head'), document.documentElement.firstChild);
|
||
if (!document.body) document.documentElement.appendChild(document.createElement('body'));
|
||
window._AMapSecurityConfig = { securityJsCode: ${jsString(security)} };
|
||
await new Promise((resolve, reject) => {
|
||
if (window.AMap) return resolve();
|
||
const s = document.createElement('script');
|
||
s.src = 'https://webapi.amap.com/maps?v=2.0&key=${encodeURIComponent(key)}&plugin=AMap.PlaceSearch,AMap.Driving';
|
||
s.onload = resolve;
|
||
s.onerror = () => reject(new Error('amap script load failed'));
|
||
document.head.appendChild(s);
|
||
});
|
||
function simplifyPoi(poi) {
|
||
const loc = poi.location || {};
|
||
const photos = (poi.photos || []).map((p) => p.url || p).filter(Boolean);
|
||
return {
|
||
id: poi.id || '',
|
||
name: poi.name || '',
|
||
type: poi.type || '',
|
||
typecode: poi.typecode || '',
|
||
address: poi.address || '',
|
||
pname: poi.pname || poi.provinceName || '',
|
||
cityname: poi.cityname || poi.cityName || '',
|
||
adname: poi.adname || poi.district || '',
|
||
adcode: poi.adcode || '',
|
||
tel: poi.tel || '',
|
||
distance: poi.distance || '',
|
||
biz_ext: poi.biz_ext || {},
|
||
rating: poi.biz_ext?.rating || poi.rating || '',
|
||
cost: poi.biz_ext?.cost || poi.cost || '',
|
||
photos,
|
||
lng: loc.lng != null ? Number(loc.lng) : '',
|
||
lat: loc.lat != null ? Number(loc.lat) : '',
|
||
};
|
||
}
|
||
window.__znkgAmapSearch = (keyword, city, type, pageSize) => new Promise((resolve, reject) => {
|
||
const ps = new AMap.PlaceSearch({ city: city || undefined, type: type || undefined, pageSize: pageSize || 20, pageIndex: 1, extensions: 'all' });
|
||
ps.search(keyword, (status, result) => {
|
||
if (status === 'complete') resolve((result.poiList?.pois || []).map(simplifyPoi));
|
||
else reject(new Error(status + ':' + JSON.stringify(result)));
|
||
});
|
||
});
|
||
window.__znkgAmapNearby = (keyword, lng, lat, radius, type, pageSize) => new Promise((resolve, reject) => {
|
||
const ps = new AMap.PlaceSearch({ type: type || undefined, pageSize: pageSize || 25, pageIndex: 1, extensions: 'all' });
|
||
ps.searchNearBy(keyword || '', [Number(lng), Number(lat)], Number(radius), (status, result) => {
|
||
if (status === 'complete') resolve((result.poiList?.pois || []).map(simplifyPoi));
|
||
else reject(new Error(status + ':' + JSON.stringify(result)));
|
||
});
|
||
});
|
||
window.__znkgAmapDrive = (oLng, oLat, dLng, dLat) => new Promise((resolve, reject) => {
|
||
const driving = new AMap.Driving();
|
||
driving.search([Number(oLng), Number(oLat)], [Number(dLng), Number(dLat)], (status, result) => {
|
||
if (status === 'complete') {
|
||
const r = result.routes?.[0] || {};
|
||
resolve({ distance: r.distance || '', time: r.time || '', tolls: r.tolls || '' });
|
||
} else reject(new Error(status + ':' + JSON.stringify(result)));
|
||
});
|
||
});
|
||
return 'ok';
|
||
})()
|
||
`;
|
||
const out = await cdp.call("Runtime.evaluate", { expression: expr, awaitPromise: true, returnByValue: true });
|
||
if (out.exceptionDetails) throw new Error(JSON.stringify(out.exceptionDetails));
|
||
}
|
||
|
||
async function callPage(cdp, name, args) {
|
||
const expr = `window.${name}(${args.map(jsString).join(",")})`;
|
||
const out = await cdp.call("Runtime.evaluate", { expression: expr, awaitPromise: true, returnByValue: true });
|
||
if (out.exceptionDetails) throw new Error(out.exceptionDetails.text || JSON.stringify(out.exceptionDetails));
|
||
return out.result?.value;
|
||
}
|
||
|
||
function normName(value) {
|
||
let s = String(value || "");
|
||
for (const token of ["风景名胜区", "风景区", "旅游景区", "景区", "旅游区", "景点", "国家级", "贵州省"]) {
|
||
s = s.replaceAll(token, "");
|
||
}
|
||
return [...s].filter((ch) => /[a-zA-Z0-9\u4e00-\u9fff]/.test(ch)).join("").toLowerCase();
|
||
}
|
||
|
||
function poiScore(poi, targetName, city = "", district = "") {
|
||
let score = 0;
|
||
const n1 = normName(targetName);
|
||
const n2 = normName(poi.name);
|
||
if (n1 && n2) {
|
||
if (n1 === n2) score += 120;
|
||
else if (n2.includes(n1) || n1.includes(n2)) score += 80;
|
||
}
|
||
if (String(poi.type || "").includes("风景") || String(poi.type || "").includes("景点")) score += 25;
|
||
if (city && String(poi.cityname || "").startsWith(city.slice(0, 2))) score += 12;
|
||
const d = String(district || "").split("/")[0].replace(/[县区市]/g, "");
|
||
if (d && String(poi.adname || "").includes(d)) score += 18;
|
||
if (poi.photos?.length) score += 5;
|
||
return score;
|
||
}
|
||
|
||
function bestPoi(pois, targetName, city = "", district = "") {
|
||
if (!pois?.length) return null;
|
||
return [...pois].sort((a, b) => poiScore(b, targetName, city, district) - poiScore(a, targetName, city, district))[0];
|
||
}
|
||
|
||
function markerUrl(lng, lat, name) {
|
||
if (!lng || !lat) return "";
|
||
const params = new URLSearchParams({ position: `${lng},${lat}`, name: name || "", src: "znkg", coordinate: "gaode", callnative: "0" });
|
||
return `https://uri.amap.com/marker?${params}`;
|
||
}
|
||
|
||
function commonFields(poi) {
|
||
const photos = poi.photos || [];
|
||
return {
|
||
amap_name: poi.name || "",
|
||
amap_poi_id: poi.id || "",
|
||
amap_type: poi.type || "",
|
||
amap_typecode: poi.typecode || "",
|
||
province: poi.pname || "",
|
||
city: poi.cityname || "",
|
||
district: poi.adname || "",
|
||
adcode: poi.adcode || "",
|
||
formatted_address: poi.address || "",
|
||
geo_lng: poi.lng || "",
|
||
geo_lat: poi.lat || "",
|
||
tel: poi.tel || "",
|
||
amap_rating: poi.rating || "",
|
||
amap_avg_cost: poi.cost || "",
|
||
first_image_url: photos[0] || "",
|
||
all_image_urls: photos.join("|"),
|
||
image_count: photos.length,
|
||
amap_url: markerUrl(poi.lng, poi.lat, poi.name),
|
||
};
|
||
}
|
||
|
||
async function cached(cache, key, producer) {
|
||
if (cache[key]) return cache[key];
|
||
const value = await producer();
|
||
cache[key] = value;
|
||
if (Object.keys(cache).length % 20 === 0) saveCache(cache);
|
||
await wait(120);
|
||
return value;
|
||
}
|
||
|
||
async function searchText(cdp, cache, keyword, city = "", type = "", pageSize = 20) {
|
||
return cached(cache, `search|${keyword}|${city}|${type}|${pageSize}`, () => callPage(cdp, "__znkgAmapSearch", [keyword, city, type, pageSize]));
|
||
}
|
||
|
||
async function searchNearby(cdp, cache, keyword, lng, lat, radius, type = "", pageSize = 25) {
|
||
return cached(cache, `nearby|${keyword}|${lng}|${lat}|${radius}|${type}|${pageSize}`, () =>
|
||
callPage(cdp, "__znkgAmapNearby", [keyword, lng, lat, radius, type, pageSize]),
|
||
);
|
||
}
|
||
|
||
async function driving(cdp, cache, oLng, oLat, dLng, dLat) {
|
||
return cached(cache, `drive|${oLng},${oLat}|${dLng},${dLat}`, async () => {
|
||
try {
|
||
return await callPage(cdp, "__znkgAmapDrive", [oLng, oLat, dLng, dLat]);
|
||
} catch (e) {
|
||
return { error: String(e.message || e) };
|
||
}
|
||
});
|
||
}
|
||
|
||
async function enrichScenic(cdp, cache, scenicRows, scenicHeaders) {
|
||
const out = [];
|
||
for (let i = 0; i < scenicRows.length; i += 1) {
|
||
const row = scenicRows[i];
|
||
const name = row.name || "";
|
||
const city = row.city || "";
|
||
const district = row.district || "";
|
||
const keywords = [name, row.amap_search_keyword, `${city} ${name}`].filter(Boolean);
|
||
let selected = null;
|
||
for (const keyword of keywords) {
|
||
const pois = await searchText(cdp, cache, keyword, city, "", 20);
|
||
selected = bestPoi(pois, name, city, district);
|
||
if (selected) break;
|
||
}
|
||
const next = { ...row };
|
||
if (selected) {
|
||
const common = commonFields(selected);
|
||
Object.assign(next, common, {
|
||
has_geo: Boolean(common.geo_lng && common.geo_lat),
|
||
amap_match_status: "matched",
|
||
amap_match_score: poiScore(selected, name, city, district),
|
||
});
|
||
if (row.first_image_url && !common.first_image_url) {
|
||
next.first_image_url = row.first_image_url;
|
||
next.all_image_urls = row.all_image_urls || "";
|
||
next.image_count = row.image_count || "";
|
||
}
|
||
} else {
|
||
next.amap_match_status = "not_found";
|
||
}
|
||
out.push(next);
|
||
console.log(`[scenic] ${i + 1}/${scenicRows.length} ${name} -> ${next.amap_name || ""} ${next.geo_lng || ""},${next.geo_lat || ""}`);
|
||
}
|
||
writeCsv(path.join(OUT_DIR, "scenic_for_amap_enriched.csv"), out, scenicHeaders);
|
||
return out;
|
||
}
|
||
|
||
async function enrichExisting(cdp, cache, rows, headers, nameField, idPrefix, typeKeyword) {
|
||
const out = [];
|
||
for (let i = 0; i < rows.length; i += 1) {
|
||
const row = rows[i];
|
||
const name = row[nameField] || "";
|
||
const city = row.city || row.expected_city || "";
|
||
const keyword = row.amap_search_keyword || `${city} ${name}`;
|
||
let selected = null;
|
||
try {
|
||
selected = bestPoi(await searchText(cdp, cache, keyword, city, typeKeyword, 20), name, city, row.district || "");
|
||
} catch {
|
||
selected = bestPoi(await searchText(cdp, cache, keyword, city, "", 20), name, city, row.district || "");
|
||
}
|
||
const next = { ...row };
|
||
if (selected) {
|
||
Object.assign(next, commonFields(selected), {
|
||
[`${idPrefix.toLowerCase()}_id`]: `${idPrefix}_${selected.id || String(i + 1).padStart(4, "0")}`,
|
||
source: `${row.source || "source_csv"}+amap_js_text`,
|
||
amap_match_status: "matched",
|
||
amap_match_score: poiScore(selected, name, city, row.district || ""),
|
||
});
|
||
} else {
|
||
Object.assign(next, {
|
||
[`${idPrefix.toLowerCase()}_id`]: `${idPrefix}_UNMATCHED_${String(i + 1).padStart(4, "0")}`,
|
||
amap_match_status: "not_found",
|
||
});
|
||
}
|
||
out.push(next);
|
||
if ((i + 1) % 10 === 0 || i + 1 === rows.length) console.log(`[${idPrefix.toLowerCase()}] ${i + 1}/${rows.length}`);
|
||
}
|
||
return out;
|
||
}
|
||
|
||
function uniqueBy(rows, keyFn) {
|
||
const seen = new Set();
|
||
const out = [];
|
||
for (const row of rows) {
|
||
const key = keyFn(row);
|
||
if (seen.has(key)) continue;
|
||
seen.add(key);
|
||
out.push(row);
|
||
}
|
||
return out;
|
||
}
|
||
|
||
async function nearby(cdp, cache, scenicRows, kind) {
|
||
const isHotel = kind === "hotel";
|
||
const keyword = isHotel ? "酒店" : "餐厅";
|
||
const type = isHotel ? "住宿服务" : "餐饮服务";
|
||
const resourceType = isHotel ? "Hotel" : "Restaurant";
|
||
const radii = [5000, 10000, 20000, 50000];
|
||
const master = [];
|
||
const rels = [];
|
||
for (let i = 0; i < scenicRows.length; i += 1) {
|
||
const s = scenicRows[i];
|
||
const lng = s.geo_lng;
|
||
const lat = s.geo_lat;
|
||
const selected = [];
|
||
const seen = new Set();
|
||
if (lng && lat) {
|
||
for (const radius of radii) {
|
||
let pois = [];
|
||
try {
|
||
pois = await searchNearby(cdp, cache, keyword, lng, lat, radius, type, 25);
|
||
} catch {
|
||
pois = await searchNearby(cdp, cache, keyword, lng, lat, radius, "", 25);
|
||
}
|
||
for (const poi of pois) {
|
||
const id = poi.id || `${poi.name}-${poi.lng}-${poi.lat}`;
|
||
if (!id || seen.has(id)) continue;
|
||
seen.add(id);
|
||
selected.push(poi);
|
||
if (selected.length >= 10) break;
|
||
}
|
||
if (selected.length >= 10) break;
|
||
}
|
||
}
|
||
for (let rank = 0; rank < Math.min(10, selected.length); rank += 1) {
|
||
const poi = selected[rank];
|
||
const common = commonFields(poi);
|
||
const resourceId = `${resourceType.toUpperCase()}_${poi.id || `${s.id}_${rank + 1}`}`;
|
||
master.push({
|
||
[`${kind}_id`]: resourceId,
|
||
[`${kind}_name`]: poi.name || "",
|
||
source: "amap_js_around",
|
||
...common,
|
||
});
|
||
const drive = common.geo_lng && common.geo_lat ? await driving(cdp, cache, lng, lat, common.geo_lng, common.geo_lat) : {};
|
||
rels.push({
|
||
scenic_id: s.id || "",
|
||
scenic_name: s.name || "",
|
||
scenic_lng: lng || "",
|
||
scenic_lat: lat || "",
|
||
resource_type: resourceType,
|
||
resource_id: resourceId,
|
||
resource_name: poi.name || "",
|
||
amap_poi_id: poi.id || "",
|
||
resource_lng: common.geo_lng || "",
|
||
resource_lat: common.geo_lat || "",
|
||
rank_for_scenic: rank + 1,
|
||
amap_around_distance_m: poi.distance || "",
|
||
drive_status: drive.error ? drive.error : "OK",
|
||
drive_distance_m: drive.distance || "",
|
||
drive_distance_km: drive.distance ? Math.round((Number(drive.distance) / 1000) * 100) / 100 : "",
|
||
drive_duration_s: drive.time || "",
|
||
drive_duration_min: drive.time ? Math.round((Number(drive.time) / 60) * 10) / 10 : "",
|
||
drive_tolls: drive.tolls || "",
|
||
province: common.province || "",
|
||
city: common.city || "",
|
||
district: common.district || "",
|
||
formatted_address: common.formatted_address || "",
|
||
amap_type: common.amap_type || "",
|
||
tel: common.tel || "",
|
||
first_image_url: common.first_image_url || "",
|
||
all_image_urls: common.all_image_urls || "",
|
||
amap_url: common.amap_url || "",
|
||
});
|
||
}
|
||
console.log(`[nearby:${kind}] ${i + 1}/${scenicRows.length} ${s.name} -> ${selected.length}`);
|
||
}
|
||
return { master: uniqueBy(master, (r) => r.amap_poi_id || r[`${kind}_name`]), rels };
|
||
}
|
||
|
||
function addNearestToMaster(masterRows, relRows, kind) {
|
||
const best = new Map();
|
||
for (const rel of relRows) {
|
||
const key = rel.amap_poi_id;
|
||
const cur = best.get(key);
|
||
const m = Number(rel.drive_duration_min || 999999);
|
||
if (!cur || m < Number(cur.drive_duration_min || 999999)) best.set(key, rel);
|
||
}
|
||
return masterRows.map((row) => {
|
||
const rel = best.get(row.amap_poi_id);
|
||
if (!rel) return row;
|
||
return {
|
||
...row,
|
||
nearest_scenic_id: rel.scenic_id,
|
||
nearest_scenic_name: rel.scenic_name,
|
||
nearest_drive_distance_km: rel.drive_distance_km,
|
||
nearest_drive_duration_min: rel.drive_duration_min,
|
||
};
|
||
});
|
||
}
|
||
|
||
function writeDictionary() {
|
||
const text = `# 高德 JS API POI 补全字段字典
|
||
|
||
## 本次输出文件
|
||
|
||
- scenic_for_amap_enriched.csv:景区 POI 补全结果。
|
||
- hotel_poi_enriched.csv:原酒店表匹配高德 POI 后的结果。
|
||
- restaurant_poi_enriched.csv:原餐饮表匹配高德 POI 后的结果。
|
||
- hotel_poi_amap_master.csv / restaurant_poi_amap_master.csv:原始资源 + 高德附近资源合并后的 POI 主表。
|
||
- scenic_hotel_nearby_10.csv / scenic_restaurant_nearby_10.csv:每个景区附近 10 个酒店/餐饮候选及驾车距离时间。
|
||
- scenic_resource_drive_metrics.csv:酒店与餐饮 nearby 关系合并表。
|
||
|
||
## 关键字段
|
||
|
||
| 字段 | 说明 |
|
||
|---|---|
|
||
| amap_poi_id | 高德 POI ID,外部来源唯一标识 |
|
||
| amap_name | 高德返回名称 |
|
||
| amap_type / amap_typecode | 高德行业分类 |
|
||
| province / city / district / adcode | 高德行政区 |
|
||
| formatted_address | 高德地址 |
|
||
| geo_lng / geo_lat | 高德 GCJ-02 坐标 |
|
||
| tel | 高德电话 |
|
||
| first_image_url / all_image_urls | 高德照片 URL |
|
||
| amap_url | 高德 marker URI,可用于前端跳转 |
|
||
| drive_distance_km | 景区到资源的高德驾车公里数 |
|
||
| drive_duration_min | 景区到资源的高德驾车分钟数 |
|
||
| rank_for_scenic | 某景区附近资源排序 |
|
||
| nearest_scenic_name | 主表中该资源离哪个景区车程最近 |
|
||
|
||
## 图谱建议
|
||
|
||
酒店和餐饮作为独立 POI 实体;景区到酒店/餐饮使用 NEARBY 关系,关系属性写入 drive_distance_km、drive_duration_min、rank_for_scenic。门票、小交通、保险等仍作为 TravelItem 绑定景区。
|
||
`;
|
||
fs.writeFileSync(path.join(OUT_DIR, "字段字典.md"), text, "utf8");
|
||
}
|
||
|
||
function writeReport({ scenicRows, hotelRows, restaurantRows, hotelRels, restaurantRels }) {
|
||
const matched = (rows) => rows.filter((r) => r.amap_match_status === "matched" || r.amap_poi_id).length;
|
||
const byScenic = new Map();
|
||
for (const r of hotelRels) byScenic.set(r.scenic_name, { ...(byScenic.get(r.scenic_name) || {}), hotel: ((byScenic.get(r.scenic_name) || {}).hotel || 0) + 1 });
|
||
for (const r of restaurantRels) byScenic.set(r.scenic_name, { ...(byScenic.get(r.scenic_name) || {}), restaurant: ((byScenic.get(r.scenic_name) || {}).restaurant || 0) + 1 });
|
||
const lines = [
|
||
"# 高德 JS API 补全报告",
|
||
"",
|
||
fLine("景区补全", matched(scenicRows), scenicRows.length),
|
||
fLine("原酒店 POI 匹配", matched(hotelRows), hotelRows.length),
|
||
fLine("原餐饮 POI 匹配", matched(restaurantRows), restaurantRows.length),
|
||
`- 景区附近酒店关系:${hotelRels.length} 条`,
|
||
`- 景区附近餐饮关系:${restaurantRels.length} 条`,
|
||
"",
|
||
"## 每个景区 nearby 覆盖",
|
||
"",
|
||
"| 景区 | 酒店候选 | 餐饮候选 |",
|
||
"|---|---:|---:|",
|
||
];
|
||
for (const [name, v] of [...byScenic.entries()].sort((a, b) => a[0].localeCompare(b[0], "zh-Hans-CN"))) {
|
||
lines.push(`| ${name} | ${v.hotel || 0} | ${v.restaurant || 0} |`);
|
||
}
|
||
fs.writeFileSync(path.join(OUT_DIR, "高德补全报告.md"), `${lines.join("\n")}\n`, "utf8");
|
||
}
|
||
|
||
function fLine(label, n, total) {
|
||
return `- ${label}:${n}/${total}`;
|
||
}
|
||
|
||
async function main() {
|
||
fs.mkdirSync(OUT_DIR, { recursive: true });
|
||
const cache = loadCache();
|
||
const { headers: scenicHeaders, rows: scenicRaw } = readCsv(SCENIC_FILE);
|
||
const { headers: hotelHeaders, rows: hotelRaw } = readCsv(HOTEL_FILE);
|
||
const { headers: restaurantHeaders, rows: restaurantRaw } = readCsv(RESTAURANT_FILE);
|
||
|
||
const { chrome, cdp } = await launchCdp();
|
||
try {
|
||
await initAmap(cdp);
|
||
const scenicRows = await enrichScenic(cdp, cache, scenicRaw, scenicHeaders);
|
||
saveCache(cache);
|
||
|
||
const hotelRows = await enrichExisting(cdp, cache, hotelRaw, hotelHeaders, "hotel_name", "HOTEL", "住宿服务");
|
||
writeCsv(path.join(OUT_DIR, "hotel_poi_enriched.csv"), hotelRows, hotelHeaders);
|
||
saveCache(cache);
|
||
|
||
const restaurantRows = await enrichExisting(cdp, cache, restaurantRaw, restaurantHeaders, "restaurant_name", "RESTAURANT", "餐饮服务");
|
||
writeCsv(path.join(OUT_DIR, "restaurant_poi_enriched.csv"), restaurantRows, restaurantHeaders);
|
||
saveCache(cache);
|
||
|
||
const nearbyHotels = await nearby(cdp, cache, scenicRows, "hotel");
|
||
saveCache(cache);
|
||
const nearbyRestaurants = await nearby(cdp, cache, scenicRows, "restaurant");
|
||
saveCache(cache);
|
||
|
||
const hotelMaster = addNearestToMaster(
|
||
uniqueBy([...hotelRows, ...nearbyHotels.master], (r) => r.amap_poi_id || r.hotel_name || r.hotel_name),
|
||
nearbyHotels.rels,
|
||
"hotel",
|
||
);
|
||
const restaurantMaster = addNearestToMaster(
|
||
uniqueBy([...restaurantRows, ...nearbyRestaurants.master], (r) => r.amap_poi_id || r.restaurant_name),
|
||
nearbyRestaurants.rels,
|
||
"restaurant",
|
||
);
|
||
|
||
writeCsv(path.join(OUT_DIR, "hotel_poi_amap_master.csv"), hotelMaster);
|
||
writeCsv(path.join(OUT_DIR, "restaurant_poi_amap_master.csv"), restaurantMaster);
|
||
writeCsv(path.join(OUT_DIR, "scenic_hotel_nearby_10.csv"), nearbyHotels.rels);
|
||
writeCsv(path.join(OUT_DIR, "scenic_restaurant_nearby_10.csv"), nearbyRestaurants.rels);
|
||
writeCsv(path.join(OUT_DIR, "scenic_resource_drive_metrics.csv"), [...nearbyHotels.rels, ...nearbyRestaurants.rels]);
|
||
writeDictionary();
|
||
writeReport({ scenicRows, hotelRows, restaurantRows, hotelRels: nearbyHotels.rels, restaurantRels: nearbyRestaurants.rels });
|
||
console.log(`done ${OUT_DIR}`);
|
||
} finally {
|
||
cdp.close();
|
||
chrome.kill();
|
||
}
|
||
}
|
||
|
||
main().catch((err) => {
|
||
console.error(err);
|
||
process.exit(1);
|
||
});
|