Initial travel knowledge graph release
This commit is contained in:
177
app/agents/gaode_connector.py
Normal file
177
app/agents/gaode_connector.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""高德(Amap) connector for the Super Agent.
|
||||
|
||||
Reuses the user's existing crawler functions (search_poi / format_fields)
|
||||
from the external project, WITHOUT image download / CSV / JSON side effects
|
||||
(those live only in crawl()/__main__, not in the functions we import).
|
||||
|
||||
Deterministic, legal, structured — the cheap "fetch" tool the Super Agent
|
||||
calls; the expensive LLM is only used for planning, not here.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
import urllib3
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
_CRAWL_PATH = "/Users/xuexue/PycharmProjects/PythonProject/xuexue-CityGraph/crawl_guiyan.py"
|
||||
_mod: Any = None
|
||||
|
||||
# 高德官方一级 POI 类型编码(按 type code 网格扫描,不靠热度关键词)
|
||||
AMAP_TYPECODES = {
|
||||
"景点": "110000", # 风景名胜
|
||||
"美食": "050000", # 餐饮服务
|
||||
"酒店": "100000", # 住宿服务
|
||||
"商场": "060000", # 购物服务
|
||||
}
|
||||
|
||||
|
||||
def _crawl():
|
||||
global _mod
|
||||
if _mod is not None:
|
||||
return _mod
|
||||
if not os.path.exists(_CRAWL_PATH):
|
||||
raise FileNotFoundError(f"高德采集脚本不存在: {_CRAWL_PATH}")
|
||||
spec = importlib.util.spec_from_file_location("crawl_guiyan", _CRAWL_PATH)
|
||||
m = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(m) # top-level = defs/CONFIG only (guarded __main__)
|
||||
_mod = m
|
||||
return m
|
||||
|
||||
|
||||
def available_types() -> list[str]:
|
||||
try:
|
||||
return list(_crawl().SEARCH_KEYWORDS.keys())
|
||||
except Exception:
|
||||
return ["景点", "美食", "酒店", "商场"]
|
||||
|
||||
|
||||
def _normalize(f: dict) -> dict:
|
||||
photos = [u for u in (f.get("门店图片") or "").split(" | ") if u]
|
||||
return {
|
||||
"gaode_poi_id": f.get("高德POI_ID", ""),
|
||||
"name": f.get("名称", ""),
|
||||
"type": f.get("类型", ""),
|
||||
"typecode": f.get("类型编码", ""),
|
||||
"lng": f.get("经度", ""),
|
||||
"lat": f.get("纬度", ""),
|
||||
"address": f.get("详细地址", ""),
|
||||
"province": f.get("省份", ""),
|
||||
"city": f.get("城市", ""),
|
||||
"district": f.get("区县", ""),
|
||||
"business_area": f.get("商圈", ""),
|
||||
"tel": f.get("联系电话", ""),
|
||||
"open_time": f.get("营业时间", ""),
|
||||
"rating": f.get("评分", ""),
|
||||
"cost": f.get("人均消费", ""),
|
||||
"level": f.get("等级", ""),
|
||||
"tags": f.get("标签", ""),
|
||||
"photo_urls": photos, # URL only, no download
|
||||
"source": "amap",
|
||||
"keyword": f.get("采集关键词", ""),
|
||||
}
|
||||
|
||||
|
||||
def search_pois(
|
||||
poi_type: str | None = None,
|
||||
keyword: str | None = None,
|
||||
max_pages: int = 2,
|
||||
limit: int = 60,
|
||||
) -> list[dict]:
|
||||
"""Fetch + normalize POIs from 高德. No file/image side effects.
|
||||
|
||||
Only rows with valid lng/lat are returned (KG requires coordinates).
|
||||
"""
|
||||
m = _crawl()
|
||||
if keyword:
|
||||
kws = [keyword]
|
||||
elif poi_type:
|
||||
kws = m.SEARCH_KEYWORDS.get(poi_type, [poi_type])
|
||||
else:
|
||||
kws = []
|
||||
out: list[dict] = []
|
||||
seen: set[str] = set()
|
||||
for kw in kws:
|
||||
for page in range(1, max_pages + 1):
|
||||
res = m.search_poi(kw, page)
|
||||
if not res or res.get("status") != "1":
|
||||
break
|
||||
pois = res.get("pois") or []
|
||||
if not pois:
|
||||
break
|
||||
for p in pois:
|
||||
try:
|
||||
f = m.format_fields(p, len(out) + 1, kw)
|
||||
except Exception:
|
||||
continue
|
||||
pid = f.get("高德POI_ID")
|
||||
if not pid or pid in seen:
|
||||
continue
|
||||
n = _normalize(f)
|
||||
if not (n["lng"] and n["lat"]):
|
||||
continue
|
||||
seen.add(pid)
|
||||
out.append(n)
|
||||
if len(out) >= limit:
|
||||
return out
|
||||
return out
|
||||
|
||||
|
||||
def _amap_key() -> str:
|
||||
return _crawl().CONFIG["key"]
|
||||
|
||||
|
||||
def search_polygon(
|
||||
typecode: str,
|
||||
bbox: tuple[float, float, float, float],
|
||||
page: int = 1,
|
||||
offset: int = 25,
|
||||
) -> tuple[list[dict], int]:
|
||||
"""高德矩形(多边形)搜索 —— 按地理网格系统扫全城,非热度排序。
|
||||
|
||||
bbox = (min_lng, min_lat, max_lng, max_lat)
|
||||
返回 (归一化后的行, 本页高德原始返回条数)。
|
||||
原始条数 < offset 视为该网格该类已扫尽。
|
||||
"""
|
||||
mnlng, mnlat, mxlng, mxlat = bbox
|
||||
# 多边形矩形:左上 | 右下
|
||||
polygon = f"{mnlng:.6f},{mxlat:.6f}|{mxlng:.6f},{mnlat:.6f}"
|
||||
params = {
|
||||
"key": _amap_key(),
|
||||
"polygon": polygon,
|
||||
"types": typecode,
|
||||
"offset": offset,
|
||||
"page": page,
|
||||
"extensions": "all",
|
||||
"output": "json",
|
||||
}
|
||||
headers = {"User-Agent": "Mozilla/5.0"}
|
||||
for attempt in range(3):
|
||||
try:
|
||||
r = requests.get(
|
||||
"https://restapi.amap.com/v3/place/polygon",
|
||||
params=params, headers=headers, timeout=30, verify=False)
|
||||
j = r.json()
|
||||
break
|
||||
except Exception:
|
||||
if attempt == 2:
|
||||
return [], 0
|
||||
continue
|
||||
if j.get("status") != "1":
|
||||
return [], 0
|
||||
pois = j.get("pois") or []
|
||||
m = _crawl()
|
||||
out: list[dict] = []
|
||||
for i, p in enumerate(pois):
|
||||
try:
|
||||
f = m.format_fields(p, i + 1, "grid")
|
||||
except Exception:
|
||||
continue
|
||||
n = _normalize(f)
|
||||
if n["lng"] and n["lat"]:
|
||||
out.append(n)
|
||||
return out, len(pois)
|
||||
Reference in New Issue
Block a user