"""高德(Amap) connector for the Super Agent. Reuses the user's existing crawler functions (search_poi / format_fields) from the external project, WITHOUT image download / CSV / JSON side effects (those live only in crawl()/__main__, not in the functions we import). Deterministic, legal, structured — the cheap "fetch" tool the Super Agent calls; the expensive LLM is only used for planning, not here. """ from __future__ import annotations import importlib.util import os from pathlib import Path from typing import Any import requests import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) _PROJECT_ROOT = Path(__file__).resolve().parents[2] _CRAWL_PATH = os.getenv("GAODE_CRAWLER_PATH", str(_PROJECT_ROOT / "scripts" / "crawl_guiyan.py")) _mod: Any = None # 高德官方一级 POI 类型编码(按 type code 网格扫描,不靠热度关键词) AMAP_TYPECODES = { "景点": "110000", # 风景名胜 "美食": "050000", # 餐饮服务 "酒店": "100000", # 住宿服务 "商场": "060000", # 购物服务 } def _crawl(): global _mod if _mod is not None: return _mod if not os.path.exists(_CRAWL_PATH): raise FileNotFoundError(f"高德采集脚本不存在: {_CRAWL_PATH}") spec = importlib.util.spec_from_file_location("crawl_guiyan", _CRAWL_PATH) m = importlib.util.module_from_spec(spec) spec.loader.exec_module(m) # top-level = defs/CONFIG only (guarded __main__) _mod = m return m def available_types() -> list[str]: try: return list(_crawl().SEARCH_KEYWORDS.keys()) except Exception: return ["景点", "美食", "酒店", "商场"] def _normalize(f: dict) -> dict: photos = [u for u in (f.get("门店图片") or "").split(" | ") if u] return { "gaode_poi_id": f.get("高德POI_ID", ""), "name": f.get("名称", ""), "type": f.get("类型", ""), "typecode": f.get("类型编码", ""), "lng": f.get("经度", ""), "lat": f.get("纬度", ""), "address": f.get("详细地址", ""), "province": f.get("省份", ""), "city": f.get("城市", ""), "district": f.get("区县", ""), "business_area": f.get("商圈", ""), "tel": f.get("联系电话", ""), "open_time": f.get("营业时间", ""), "rating": f.get("评分", ""), "cost": f.get("人均消费", ""), "level": f.get("等级", ""), "tags": f.get("标签", ""), "photo_urls": photos, # URL only, no download "source": "amap", "keyword": f.get("采集关键词", ""), } def search_pois( poi_type: str | None = None, keyword: str | None = None, max_pages: int = 2, limit: int = 60, ) -> list[dict]: """Fetch + normalize POIs from 高德. No file/image side effects. Only rows with valid lng/lat are returned (KG requires coordinates). """ m = _crawl() if keyword: kws = [keyword] elif poi_type: kws = m.SEARCH_KEYWORDS.get(poi_type, [poi_type]) else: kws = [] out: list[dict] = [] seen: set[str] = set() for kw in kws: for page in range(1, max_pages + 1): res = m.search_poi(kw, page) if not res or res.get("status") != "1": break pois = res.get("pois") or [] if not pois: break for p in pois: try: f = m.format_fields(p, len(out) + 1, kw) except Exception: continue pid = f.get("高德POI_ID") if not pid or pid in seen: continue n = _normalize(f) if not (n["lng"] and n["lat"]): continue seen.add(pid) out.append(n) if len(out) >= limit: return out return out def _amap_key() -> str: return _crawl().CONFIG["key"] def search_polygon( typecode: str, bbox: tuple[float, float, float, float], page: int = 1, offset: int = 25, ) -> tuple[list[dict], int]: """高德矩形(多边形)搜索 —— 按地理网格系统扫全城,非热度排序。 bbox = (min_lng, min_lat, max_lng, max_lat) 返回 (归一化后的行, 本页高德原始返回条数)。 原始条数 < offset 视为该网格该类已扫尽。 """ mnlng, mnlat, mxlng, mxlat = bbox # 多边形矩形:左上 | 右下 polygon = f"{mnlng:.6f},{mxlat:.6f}|{mxlng:.6f},{mnlat:.6f}" params = { "key": _amap_key(), "polygon": polygon, "types": typecode, "offset": offset, "page": page, "extensions": "all", "output": "json", } headers = {"User-Agent": "Mozilla/5.0"} for attempt in range(3): try: r = requests.get( "https://restapi.amap.com/v3/place/polygon", params=params, headers=headers, timeout=30, verify=False) j = r.json() break except Exception: if attempt == 2: return [], 0 continue if j.get("status") != "1": return [], 0 pois = j.get("pois") or [] m = _crawl() out: list[dict] = [] for i, p in enumerate(pois): try: f = m.format_fields(p, i + 1, "grid") except Exception: continue n = _normalize(f) if n["lng"] and n["lat"]: out.append(n) return out, len(pois)