Initial travel knowledge graph release

2026-06-09 09:56:26 +08:00
commit 5f061295d8
402 changed files with 103877 additions and 0 deletions
--- a/app/agents/super_orchestrator.py
+++ b/app/agents/super_orchestrator.py
@@ -0,0 +1,856 @@
+"""Super Agent — 知识图谱馆长（全城网格自治采集，常驻不停）。
+
+科学合理 & 省额度的核心：**地理网格扫描**，不再用"热度关键词"。
+  • 把贵阳整个城市切成网格，逐格用高德"多边形(矩形)搜索 + 官方类型编码"
+    系统性扫，每格每页都是不同地理切片 → 几乎不重复 → 不浪费 API 额度。
+  • 每格扫到第几页**持久化**（gaode_grid_cells.next_page），跨步/跨轮接着扫，
+    扫尽的格标记 exhausted **永不重复请求**（省额度的关键）。
+  • 稠密区一格出不完 → **自适应四叉细分**，保证不漏。
+  • 全城扫完不"结束"，转**驻守巡检**（不再消耗额度，仅响应停止/再播种）。
+  • 进度=网格地理覆盖率，真实可反映，非拍脑袋数字。
+"""
+from __future__ import annotations
+
+import asyncio
+import re
+
+from app.api.graph import _get_graph
+from app.config import settings
+from app.agents.gaode_connector import AMAP_TYPECODES, search_polygon
+from app.agents.super_ingest import ingest_rows
+from app.agents.distill_gate import distill_entity, ATTR_FIELDS
+from app.agents.web_agent import web_enrich
+from app.agents.xhs_agent import xhs_enrich
+from app.agents.douyin_agent import douyin_enrich
+from app.agents.event_miner import mine_events
+from app.db import (
+    sa_append_step, sa_finish, sa_stop_requested, sa_add_task, sa_set_status,
+    sa_has_open_escalation, create_acquisition_task, create_notification,
+    get_admin_user_id, grid_seed, grid_counts, grid_pending_cats,
+    grid_take_next, grid_update, grid_subdivide,
+    sa_merge_candidate_payload, sa_record_conflict, sa_record_schema_proposal,
+)
+
+_BG: set = set()
+
+# 贵阳整市外接框 (min_lng, min_lat, max_lng, max_lat)：含主城+周边区县
+_GY_BBOX = (106.20, 26.10, 107.30, 27.05)
+_CELL = 0.08                 # 初始网格边长(°)，约 8km
+_OFFSET = 25                 # 高德多边形单页上限
+_MAX_PAGE_SPLIT = 8          # 单格翻到第N页仍满 → 太密 → 四叉细分
+_MAX_DEPTH = 3               # 最深细分层级（0.08→约1km格）
+_API_PACING = 0.35           # 每次高德调用间隔(秒)，护 QPS
+_HARD_MAX_STEPS = 100000     # 仅极端兜底（持久化，跨轮续扫，不暴露用户）
+_MAX_API_PER_RUN = 6000      # 单进程跑安全上限；命中转驻守，下轮自动续
+_STEWARD_INTERVAL = 1800
+_STEWARD_TICK = 10
+_ENRICH_BATCH = 5            # 每步最多蒸馏富集多少个实体
+_ENRICH_EVERY = 4            # 网格忙时每 N 步插一次知识富集
+_WEB_BATCH = 2               # 每步最多联网富集多少个实体(真浏览器较慢)
+_XHS_BATCH = 1               # 小红书浏览器很慢/脆，每步只采 1 个
+
+
+def _place_counts() -> dict:
+    g = _get_graph()
+    out: dict = {}
+    try:
+        for row in g.query(
+            "MATCH (p:Place) RETURN coalesce(p.place_type,'?'), count(*)"
+        ).result_set:
+            out[row[0]] = row[1]
+    except Exception:
+        pass
+    return out
+
+
+def _coverage() -> dict:
+    from app.agents.super_ingest import _PT
+    counts = _place_counts()
+    cat2pt = {cat: _PT.get(cat, "poi") for cat in AMAP_TYPECODES}
+    items = [{"cat": cat, "place_type": cat2pt[cat],
+              "current": int(counts.get(cat2pt[cat], 0))}
+             for cat in AMAP_TYPECODES]
+    return {"items": items, "total": sum(i["current"] for i in items)}
+
+
+def _grid_for(bbox: tuple, step: float) -> list[tuple]:
+    mnlng, mnlat, mxlng, mxlat = bbox
+    cells, lng = [], mnlng
+    while lng < mxlng:
+        lat = mnlat
+        nlng = round(min(lng + step, mxlng), 6)
+        while lat < mxlat:
+            nlat = round(min(lat + step, mxlat), 6)
+            cells.append((round(lng, 6), round(lat, 6), nlng, nlat))
+            lat = nlat
+        lng = nlng
+    return cells
+
+
+def _quads(c: dict) -> list[tuple]:
+    """父格四等分为 4 个子格(矩形对半切)。"""
+    mnlng, mnlat = c["min_lng"], c["min_lat"]
+    mxlng, mxlat = c["max_lng"], c["max_lat"]
+    midlng = round((mnlng + mxlng) / 2, 6)
+    midlat = round((mnlat + mxlat) / 2, 6)
+    return [
+        (mnlng, mnlat, midlng, midlat), (midlng, mnlat, mxlng, midlat),
+        (mnlng, midlat, midlng, mxlat), (midlng, midlat, mxlng, mxlat),
+    ]
+
+
+async def _seed_if_needed() -> None:
+    have = await grid_counts()
+    for cat, tc in AMAP_TYPECODES.items():
+        if cat not in have:
+            await grid_seed(cat, tc, _grid_for(_GY_BBOX, _CELL))
+
+
+async def _escalate(run_id: int, step: int, cat: str, cur: int) -> None:
+    if await sa_has_open_escalation(cat):
+        await sa_add_task(run_id, step, cat, "escalate",
+                          "全城网格已扫尽，该类仍偏少", "skip_dup",
+                          status="escalated", note="已有未结工单，不重复打扰")
+        return
+    task_id = None
+    try:
+        task = await create_acquisition_task({
+            "tenant_id": settings.default_tenant,
+            "project_id": settings.default_project,
+            "created_by": "super_agent",
+            "title": f"【Super Agent 求助】「{cat}」全城网格已扫尽仍偏少",
+            "description": (
+                f"馆长已对贵阳全城做网格化高德采集，「{cat}」当前仅 {cur} 条，"
+                f"高德官方源对该类覆盖有限。建议人工核查类型编码或改用"
+                f"小红书/大众点评/官方名录等渠道补全。馆长继续驻守，"
+                f"工单结清/数据增长后自动恢复。"),
+            "scenario_tags": ["super_agent", "escalation", cat],
+            "target_entity_types": ["Place"],
+            "target_fields": [],
+            "suggested_collection_method": "manual_or_alt_source",
+            "priority": 1,
+        })
+        task_id = task["id"]
+    except Exception:
+        task_id = None
+    try:
+        uid = await get_admin_user_id()
+        if uid:
+            await create_notification(
+                uid, title=f"Super Agent 求助：「{cat}」全城网格已扫尽",
+                body=(f"「{cat}」仅 {cur} 条，高德源覆盖有限。已开工单"
+                      + (f" #{task_id}" if task_id else "")
+                      + "，请人工/改渠道；馆长继续驻守。"),
+                ntype="task", related_task_id=task_id)
+    except Exception:
+        pass
+    await sa_add_task(run_id, step, cat, "escalate",
+                      "全城网格已扫尽，该类仍偏少", "notify_admin",
+                      status="escalated", related_task_id=task_id)
+    await sa_append_step(run_id, {
+        "step": step, "action": "escalate", "cat": cat,
+        "reason": f"「{cat}」全城网格扫尽仅{cur}条，已开工单并通知管理员，继续驻守"})
+
+
+def _enrich_targets(limit: int) -> list[dict]:
+    """未富集过的 Place + 锚点(名/址/区/类) + 现有软字段。
+
+    取数显式排除 经纬度/电话（隐私红线，绝不外发给蒸馏模型）。
+    """
+    g = _get_graph()
+    try:
+        rs = g.query(
+            "MATCH (p:Place) WHERE p.enrich_done IS NULL "
+            "RETURN p.element_id, p.name, coalesce(p.place_type,''), "
+            "coalesce(p.district,''), coalesce(p.address,''), "
+            "coalesce(p.summary,''), coalesce(p.history,''), "
+            "coalesce(p.features,''), coalesce(p.suitable_for,''), "
+            "coalesce(p.best_season,''), coalesce(p.ticket_hint,'') "
+            "LIMIT $n", {"n": limit}).result_set
+    except Exception:
+        return []
+    out = []
+    for r in rs:
+        if not (r and r[1]):
+            continue
+        existing = {k: v for k, v in zip(
+            ATTR_FIELDS, [r[5], r[6], r[7], r[8], r[9], r[10]]) if v}
+        out.append({"eid": r[0], "name": r[1], "place_type": r[2],
+                    "district": r[3], "address": r[4],
+                    "existing": existing})
+    return out
+
+
+def _apply_enrich(eid: str, fields: dict) -> None:
+    """共识字段写回 FalkorDB 节点；无论是否有字段都打 enrich_done 防重复空跑。"""
+    g = _get_graph()
+    sets = ["p.enrich_done=1"]
+    params = {"eid": eid}
+    for k in ATTR_FIELDS:
+        if fields.get(k):
+            sets.append(f"p.{k}=${k}")
+            params[k] = fields[k]
+    try:
+        g.query(f"MATCH (p:Place {{element_id:$eid}}) SET {','.join(sets)}",
+                params)
+    except Exception:
+        pass
+
+
+async def _distill_enrich(run_id: int, step: int,
+                          targets: list[dict]) -> bool:
+    """工具：多模型知识蒸馏，给已有实体补"知识层"属性。
+
+    独立数据来源（不是高德质检闸门）：问 N 个模型脑内知识 → 全局模型聚合
+    跨模型共识 → 写回 FalkorDB 节点 + 候选 payload（审计可溯）。
+    """
+    enriched = adopt_fields = keep_total = conflict_total = uncertain_total = 0
+    conflict_names: list[str] = []
+    last = ""
+    for t in targets:
+        res = await distill_entity(t)
+        last = res.get("summary", "")
+        if not res.get("ok"):
+            # 配置/连通问题：整批中止——不打标记(待配置后重试)、不空转
+            await sa_add_task(
+                run_id, step, "蒸馏", "distill",
+                f"知识蒸馏未就绪：{last}", "distill_enrich",
+                result={"fetched": 0, "approved": 0, "pending": 0,
+                        "skipped": 0}, status="skipped", note=last)
+            await sa_append_step(run_id, {
+                "step": step, "action": "distill",
+                "reason": f"知识蒸馏未配置/不可用（{last}），本步跳过，"
+                          f"配好≥2蒸馏模型+全局聚合后自动恢复"})
+            return False
+
+        adopt = res.get("adopt") or {}
+        _apply_enrich(t["eid"], adopt)        # 只写 adopt；无则仅打标记防空转
+        if adopt:
+            await sa_merge_candidate_payload(t["eid"], adopt)
+            enriched += 1
+            adopt_fields += len(adopt)
+        keep_total += len(res.get("keep") or [])
+        uncertain_total += len(res.get("uncertain") or [])
+
+        # 与图谱既有值矛盾 → 不覆盖，落 validation_issues 转人工
+        for c in (res.get("conflict") or []):
+            await sa_record_conflict(
+                t["eid"], c.get("field", ""), c.get("existing", ""),
+                c.get("distilled", ""), c.get("note", ""))
+            conflict_total += 1
+            if t.get("name"):
+                conflict_names.append(f"{t['name']}·{c.get('field','')}")
+
+    res_obj = {"fetched": len(targets), "approved": adopt_fields,
+               "pending": conflict_total, "skipped": uncertain_total}
+    await sa_add_task(
+        run_id, step, "蒸馏", "distill",
+        f"多模型知识蒸馏富集 {len(targets)} 个实体（{last}）",
+        "distill_enrich", result=res_obj, status="done",
+        note=f"富集{enriched}实体/{adopt_fields}字段·一致{keep_total}"
+             f"·矛盾{conflict_total}·存疑{uncertain_total}")
+    await sa_append_step(run_id, {
+        "step": step, "action": "distill",
+        "reason": f"知识蒸馏富集：{enriched}/{len(targets)} 实体补 {adopt_fields} 字段，"
+                  f"矛盾 {conflict_total} 转人工（{last}）"})
+
+    if conflict_total:
+        try:
+            uid = await get_admin_user_id()
+            if uid:
+                await create_notification(
+                    uid,
+                    title=f"蒸馏发现 {conflict_total} 处与图谱既有数据矛盾",
+                    body=("需人工裁决（蒸馏未覆盖图谱）："
+                          + "、".join(conflict_names[:8])
+                          + ("…" if len(conflict_names) > 8 else "")
+                          + "。详见 数据质量 / 校验问题(distill_conflict)。"),
+                    ntype="task")
+        except Exception:
+            pass
+    return True
+
+
+def _web_targets(limit: int) -> list[dict]:
+    """未联网富集过的 Place（无 web_done）+ 锚点 + 现有软字段。
+    取数显式排除 经纬度/电话（隐私红线）。"""
+    g = _get_graph()
+    try:
+        # 景点最可能有公开百科页 → 优先联网富集，命中率最高
+        rs = g.query(
+            "MATCH (p:Place) WHERE p.web_done IS NULL "
+            "RETURN p.element_id, p.name, coalesce(p.place_type,''), "
+            "coalesce(p.district,''), coalesce(p.address,''), "
+            "coalesce(p.summary,''), coalesce(p.history,''), "
+            "coalesce(p.features,''), coalesce(p.suitable_for,''), "
+            "coalesce(p.best_season,''), coalesce(p.ticket_hint,'') "
+            "ORDER BY CASE WHEN p.place_type='sight' THEN 0 ELSE 1 END, "
+            "p.element_id LIMIT $n", {"n": limit}).result_set
+    except Exception:
+        return []
+    out = []
+    for r in rs:
+        if not (r and r[1]):
+            continue
+        existing = {k: v for k, v in zip(
+            ATTR_FIELDS, [r[5], r[6], r[7], r[8], r[9], r[10]]) if v}
+        out.append({"eid": r[0], "name": r[1], "place_type": r[2],
+                    "district": r[3], "address": r[4], "existing": existing})
+    return out
+
+
+def _apply_web(eid: str, adopt: dict) -> None:
+    """网页采纳字段写回；打 web_done(必)；有 adopt 则连 enrich_done 一并打
+    (网页权威，省一次记忆蒸馏)。"""
+    g = _get_graph()
+    sets = ["p.web_done=1"]
+    params = {"eid": eid}
+    if adopt:
+        sets.append("p.enrich_done=1")
+    for k in ATTR_FIELDS:
+        if adopt.get(k):
+            sets.append(f"p.{k}=${k}")
+            params[k] = adopt[k]
+    try:
+        g.query(f"MATCH (p:Place {{element_id:$eid}}) SET {','.join(sets)}",
+                params)
+    except Exception:
+        pass
+
+
+async def _web_enrich(run_id: int, step: int, targets: list[dict]) -> bool:
+    """工具：browser-use 式联网采集（真浏览器抓权威页 → opus 抽取对齐）。"""
+    enriched = adopt_fields = found = conflict_total = gap_total = 0
+    last = ""
+    for t in targets:
+        r = await web_enrich(t)
+        last = r.get("summary", "")
+        if not r.get("ok"):
+            await sa_add_task(run_id, step, "联网", "web",
+                              f"web_agent 未就绪：{last}", "web_agent",
+                              result={"fetched": 0, "approved": 0,
+                                      "pending": 0, "skipped": 0},
+                              status="skipped", note=last)
+            await sa_append_step(run_id, {
+                "step": step, "action": "web",
+                "reason": f"web_agent 未配置/不可用（{last}），跳过待恢复"})
+            return False
+        if not r.get("found") or r.get("entity_match") is False:
+            _apply_web(t["eid"], {})           # 标记 web_done，不再重复抓
+            continue
+        found += 1
+        adopt = r.get("adopt") or {}
+        _apply_web(t["eid"], adopt)
+        if adopt:
+            await sa_merge_candidate_payload(t["eid"], adopt)
+            enriched += 1
+            adopt_fields += len(adopt)
+        for c in (r.get("conflict") or []):
+            await sa_record_conflict(t["eid"], c.get("field", ""),
+                                     c.get("existing", ""),
+                                     c.get("web", ""), c.get("note", ""))
+            conflict_total += 1
+        for sgap in (r.get("schema_gaps") or []):
+            iid = await sa_record_schema_proposal(
+                sgap.get("attr", ""), sgap.get("field", "")
+                or re.sub(r"\W+", "_", sgap.get("attr", "")).strip("_"),
+                str(sgap.get("value", ""))[:200],
+                f"web_agent 在「{t['name']}」网页发现：{sgap.get('why','')}",
+                float(r.get("confidence") or 0.7))
+            if iid:
+                gap_total += 1
+
+    res_obj = {"fetched": len(targets), "approved": adopt_fields,
+               "pending": conflict_total, "skipped": gap_total}
+    await sa_add_task(
+        run_id, step, "联网", "web",
+        f"browser-use 联网采集 {len(targets)} 个实体（{last}）",
+        "web_agent", result=res_obj, status="done",
+        note=f"命中{found}·补{adopt_fields}字段·矛盾{conflict_total}"
+             f"·schema提案{gap_total}")
+    await sa_append_step(run_id, {
+        "step": step, "action": "web",
+        "reason": f"联网采集：命中 {found}/{len(targets)}，补 {adopt_fields} 字段，"
+                  f"矛盾 {conflict_total} 转人工，schema 提案 {gap_total}（{last}）"})
+    if conflict_total or gap_total:
+        try:
+            uid = await get_admin_user_id()
+            if uid:
+                await create_notification(
+                    uid,
+                    title=f"web_agent：{conflict_total} 矛盾 / {gap_total} schema 提案待裁决",
+                    body="联网采集与图谱矛盾或发现新属性，详见 数据质量/校验问题 "
+                         "与 本体建模/字段提案。",
+                    ntype="task")
+        except Exception:
+            pass
+    return True
+
+
+def _xhs_targets(limit: int) -> list[dict]:
+    """未采过小红书(无 xhs_done)的 Place；美食最优先(小红书食/玩为主)。"""
+    g = _get_graph()
+    try:
+        rs = g.query(
+            "MATCH (p:Place) WHERE p.xhs_done IS NULL "
+            "RETURN p.element_id, p.name, coalesce(p.place_type,''), "
+            "coalesce(p.district,'') "
+            "ORDER BY CASE WHEN p.place_type='eat' THEN 0 "
+            "WHEN p.place_type='sight' THEN 1 ELSE 2 END, p.element_id "
+            "LIMIT $n", {"n": limit}).result_set
+    except Exception:
+        return []
+    return [{"eid": r[0], "name": r[1], "place_type": r[2], "district": r[3]}
+            for r in rs if r and r[1]]
+
+
+def _apply_xhs(eid: str, tags: list[str]) -> None:
+    """体验标签写回：MERGE ExperienceTag + (Place)-[:HAS_TAG]->(tag)；
+    无论有无标签都打 xhs_done 防重复。"""
+    g = _get_graph()
+    try:
+        g.query("MATCH (p:Place {element_id:$eid}) SET p.xhs_done=1",
+                {"eid": eid})
+        for t in tags:
+            g.query(
+                "MATCH (p:Place {element_id:$eid}) "
+                "MERGE (e:ExperienceTag {name:$t}) "
+                "MERGE (p)-[:HAS_TAG]->(e) SET e.source='xiaohongshu'",
+                {"eid": eid, "t": t})
+    except Exception:
+        pass
+
+
+async def _xhs_enrich(run_id: int, step: int, targets: list[dict]) -> bool:
+    """工具：小红书 UGC → 体验标签。未登录→升级人工(一次性登录)，本轮关闭。"""
+    done = tags_total = found = ev_total = 0
+    last = ""
+    for t in targets:
+        r = await xhs_enrich(t)
+        last = r.get("summary", "")
+        if not r.get("ok"):
+            await sa_add_task(run_id, step, "小红书", "xhs",
+                              f"xhs_agent 未就绪：{last}", "xhs_agent",
+                              result={"fetched": 0, "approved": 0,
+                                      "pending": 0, "skipped": 0},
+                              status="skipped", note=last)
+            await sa_append_step(run_id, {"step": step, "action": "xhs",
+                                          "reason": f"小红书未配置/停用（{last}）"})
+            return False
+        if r.get("need_login"):
+            if not await sa_has_open_escalation("小红书登录"):
+                try:
+                    task = await create_acquisition_task({
+                        "tenant_id": settings.default_tenant,
+                        "project_id": settings.default_project,
+                        "created_by": "super_agent",
+                        "title": "【Super Agent 求助】小红书需一次性人工登录",
+                        "description": "后台 xhs_agent 检测到未登录。请在项目根目录运行 "
+                                       "`python3 scripts/xhs_login.py`，弹出浏览器里登录"
+                                       "小红书一次；cookie 持久化后馆长自动恢复采集。",
+                        "scenario_tags": ["super_agent", "escalation", "小红书登录"],
+                        "target_entity_types": ["Place"], "target_fields": [],
+                        "suggested_collection_method": "manual_login",
+                        "priority": 1})
+                    uid = await get_admin_user_id()
+                    if uid:
+                        await create_notification(
+                            uid, title="小红书需一次性登录",
+                            body="运行 scripts/xhs_login.py 登录一次即可，馆长自动恢复。",
+                            ntype="task", related_task_id=task["id"])
+                except Exception:
+                    pass
+            await sa_add_task(run_id, step, "小红书", "xhs",
+                              "小红书未登录，已升级人工一次性登录", "notify_admin",
+                              result={"fetched": 0, "approved": 0,
+                                      "pending": 0, "skipped": 0},
+                              status="escalated", note="待 scripts/xhs_login.py")
+            await sa_append_step(run_id, {
+                "step": step, "action": "xhs",
+                "reason": "小红书未登录 → 已开工单+通知(运行 xhs_login.py)，本轮暂停小红书"})
+            return False
+        ev_total += int(r.get("evidence_saved") or 0)
+        if r.get("found"):
+            _apply_xhs(t["eid"], r.get("tags") or [])
+            found += 1
+            if r.get("tags"):
+                done += 1
+                tags_total += len(r["tags"])
+        else:
+            _apply_xhs(t["eid"], [])           # 标记，避免重复
+    res_obj = {"fetched": len(targets), "approved": tags_total,
+               "pending": ev_total, "skipped": len(targets) - found}
+    await sa_add_task(run_id, step, "小红书", "xhs",
+                      f"小红书 UGC 采集 {len(targets)} 个实体（{last}）",
+                      "xhs_agent", result=res_obj, status="done",
+                      note=f"命中{found}·证据{ev_total}条·体验标签{tags_total}")
+    await sa_append_step(run_id, {
+        "step": step, "action": "xhs",
+        "reason": f"小红书采集：命中 {found}/{len(targets)}，证据入库 {ev_total} 条，"
+                  f"产出 {tags_total} 个体验标签（{last}）"})
+    return True
+
+
+def _dy_targets(limit: int) -> list[dict]:
+    """未采过抖音(无 dy_done)的 Place；美食/景点优先(抖音食玩为主)。"""
+    g = _get_graph()
+    try:
+        rs = g.query(
+            "MATCH (p:Place) WHERE p.dy_done IS NULL "
+            "RETURN p.element_id, p.name, coalesce(p.place_type,''), "
+            "coalesce(p.district,'') "
+            "ORDER BY CASE WHEN p.place_type='eat' THEN 0 "
+            "WHEN p.place_type='sight' THEN 1 ELSE 2 END, p.element_id "
+            "LIMIT $n", {"n": limit}).result_set
+    except Exception:
+        return []
+    return [{"eid": r[0], "name": r[1], "place_type": r[2], "district": r[3]}
+            for r in rs if r and r[1]]
+
+
+def _apply_dy(eid: str, tags: list[str]) -> None:
+    g = _get_graph()
+    try:
+        g.query("MATCH (p:Place {element_id:$eid}) SET p.dy_done=1",
+                {"eid": eid})
+        for t in tags:
+            g.query(
+                "MATCH (p:Place {element_id:$eid}) "
+                "MERGE (e:ExperienceTag {name:$t}) "
+                "MERGE (p)-[:HAS_TAG]->(e) SET e.source='douyin'",
+                {"eid": eid, "t": t})
+    except Exception:
+        pass
+
+
+async def _douyin_enrich(run_id: int, step: int,
+                         targets: list[dict]) -> bool:
+    """工具：抖音 UGC → 证据层+体验标签。未登录→升级一次性登录，本轮关闭。"""
+    done = tags_total = found = ev_total = 0
+    last = ""
+    for t in targets:
+        r = await douyin_enrich(t)
+        last = r.get("summary", "")
+        if not r.get("ok"):
+            await sa_add_task(run_id, step, "抖音", "douyin",
+                              f"douyin_agent 未就绪：{last}", "douyin_agent",
+                              result={"fetched": 0, "approved": 0,
+                                      "pending": 0, "skipped": 0},
+                              status="skipped", note=last)
+            await sa_append_step(run_id, {"step": step, "action": "douyin",
+                                          "reason": f"抖音未配置/停用（{last}）"})
+            return False
+        if r.get("need_login"):
+            if not await sa_has_open_escalation("抖音登录"):
+                try:
+                    task = await create_acquisition_task({
+                        "tenant_id": settings.default_tenant,
+                        "project_id": settings.default_project,
+                        "created_by": "super_agent",
+                        "title": "【Super Agent 求助】抖音需一次性人工登录",
+                        "description": "后台 douyin_agent 检测到未登录。请在项目根目录"
+                                       "运行 `python3 scripts/douyin_login.py`，弹出"
+                                       "浏览器里登录抖音一次；cookie 持久化后自动恢复。",
+                        "scenario_tags": ["super_agent", "escalation",
+                                          "抖音登录"],
+                        "target_entity_types": ["Place"], "target_fields": [],
+                        "suggested_collection_method": "manual_login",
+                        "priority": 1})
+                    uid = await get_admin_user_id()
+                    if uid:
+                        await create_notification(
+                            uid, title="抖音需一次性登录",
+                            body="运行 scripts/douyin_login.py 登录一次即可，"
+                                 "馆长自动恢复。",
+                            ntype="task", related_task_id=task["id"])
+                except Exception:
+                    pass
+            await sa_add_task(run_id, step, "抖音", "douyin",
+                              "抖音未登录，已升级人工一次性登录", "notify_admin",
+                              result={"fetched": 0, "approved": 0,
+                                      "pending": 0, "skipped": 0},
+                              status="escalated",
+                              note="待 scripts/douyin_login.py")
+            await sa_append_step(run_id, {
+                "step": step, "action": "douyin",
+                "reason": "抖音未登录 → 已开工单+通知(运行 douyin_login.py)，"
+                          "本轮暂停抖音"})
+            return False
+        ev_total += int(r.get("evidence_saved") or 0)
+        if r.get("found"):
+            _apply_dy(t["eid"], r.get("tags") or [])
+            found += 1
+            if r.get("tags"):
+                done += 1
+                tags_total += len(r["tags"])
+        else:
+            _apply_dy(t["eid"], [])
+    res_obj = {"fetched": len(targets), "approved": tags_total,
+               "pending": ev_total, "skipped": len(targets) - found}
+    await sa_add_task(run_id, step, "抖音", "douyin",
+                      f"抖音 UGC 采集 {len(targets)} 个实体（{last}）",
+                      "douyin_agent", result=res_obj, status="done",
+                      note=f"命中{found}·证据{ev_total}条·体验标签{tags_total}")
+    await sa_append_step(run_id, {
+        "step": step, "action": "douyin",
+        "reason": f"抖音采集：命中 {found}/{len(targets)}，证据入库 {ev_total} 条，"
+                  f"产出 {tags_total} 个体验标签（{last}）"})
+    return True
+
+
+def _events_targets(limit: int) -> list[dict]:
+    """已采过任何源(web/xhs/dy) 但还没抽过事件(无 events_done)的 Place。
+
+    平台无关：只要 social_evidence 里有该 pnk 的证据(任意 platform)，
+    都可以挖事件,不再硬要求 xhs_done。
+    """
+    g = _get_graph()
+    try:
+        rs = g.query(
+            "MATCH (p:Place) "
+            "WHERE (p.web_done=1 OR p.xhs_done=1 OR p.dy_done=1) "
+            "  AND p.events_done IS NULL "
+            "RETURN p.element_id, p.name LIMIT $n", {"n": limit}).result_set
+    except Exception:
+        return []
+    return [{"eid": r[0], "name": r[1]} for r in rs if r and r[1]]
+
+
+def _apply_events(eid: str, events: list[dict]) -> None:
+    """事件写回：MERGE Event + (Place)-[:HAS_EVENT{time,type}]->(Event)。
+
+    - source 不再硬编 'xiaohongshu',按事件实际来源(baike/wiki/xhs/douyin)写
+    - 加 time_norm(排序用) / participants(涉及人物) / confidence(置信度)
+    - MERGE 用 (place,title) 幂等,同名事件不会重复
+    - 无论有无都打 events_done 防重复整批
+    """
+    g = _get_graph()
+    try:
+        g.query("MATCH (p:Place {element_id:$eid}) SET p.events_done=1",
+                {"eid": eid})
+        for e in events:
+            g.query(
+                "MATCH (p:Place {element_id:$eid}) "
+                "MERGE (ev:Event {place:$eid, title:$t}) "
+                "SET ev.time=$tm, ev.time_norm=$tn, ev.type=$ty, "
+                "    ev.desc=$d, ev.source=$src, ev.participants=$ppl, "
+                "    ev.confidence=$conf "
+                "MERGE (p)-[:HAS_EVENT {time:$tn, type:$ty}]->(ev)",
+                {"eid": eid, "t": e.get("title", ""),
+                 "tm": e.get("time", ""),
+                 "tn": e.get("time_norm", "") or e.get("time", ""),
+                 "ty": e.get("type", ""), "d": e.get("desc", ""),
+                 "src": e.get("source_platform", "") or "mixed",
+                 "ppl": ",".join(e.get("participants") or []),
+                 "conf": float(e.get("confidence") or 0)})
+    except Exception:
+        pass
+
+
+async def _event_mine(run_id: int, step: int, targets: list[dict]) -> bool:
+    """工具：从证据层评论/帖子抽时间锚定事件(纯 LLM，不爬网)。"""
+    done = ev_total = 0
+    last = ""
+    for t in targets:
+        r = await mine_events(t)
+        last = r.get("summary", "")
+        if not r.get("ok"):
+            await sa_add_task(run_id, step, "事件", "event",
+                              f"event_miner 未就绪：{last}", "event_miner",
+                              result={"fetched": 0, "approved": 0,
+                                      "pending": 0, "skipped": 0},
+                              status="skipped", note=last)
+            await sa_append_step(run_id, {"step": step, "action": "event",
+                                          "reason": f"事件抽取未配置（{last}）"})
+            return False
+        evs = r.get("events") or []
+        _apply_events(t["eid"], evs)
+        if evs:
+            done += 1
+            ev_total += len(evs)
+    res_obj = {"fetched": len(targets), "approved": ev_total,
+               "pending": 0, "skipped": len(targets) - done}
+    await sa_add_task(run_id, step, "事件", "event",
+                      f"评论时间→事件抽取 {len(targets)} 个实体（{last}）",
+                      "event_miner", result=res_obj, status="done",
+                      note=f"命中{done}·事件{ev_total}")
+    await sa_append_step(run_id, {
+        "step": step, "action": "event",
+        "reason": f"事件抽取：{done}/{len(targets)} 实体，"
+                  f"产出 {ev_total} 个时间锚定事件（{last}）"})
+    return True
+
+
+async def run_super_agent(run_id: int) -> None:
+    await _seed_if_needed()
+    step = 0
+    api_calls = 0
+    enrich_off = False           # 记忆蒸馏未配置/不可用时本轮关闭
+    web_off = False              # web_agent 未就绪时本轮关闭，避免空转
+    xhs_off = False              # 小红书未配置/未登录时本轮关闭
+    dy_off = False               # 抖音未配置/未登录时本轮关闭
+    event_off = False            # 事件抽取未配置时本轮关闭
+    enrich_turn = 0              # 富集源轮转(web/xhs/douyin/event/distill 公平)
+    escalated: set[str] = set()
+
+    async def _wait_steward() -> bool:
+        waited = 0
+        while waited < _STEWARD_INTERVAL:
+            if await sa_stop_requested(run_id):
+                return True
+            await asyncio.sleep(_STEWARD_TICK)
+            waited += _STEWARD_TICK
+        return False
+
+    try:
+        while True:
+            if await sa_stop_requested(run_id):
+                await sa_finish(run_id, "stopped")
+                return
+
+            # ── 指挥大脑选工具 ───────────────────────────────────────
+            #  高德网格法 = 主力快采（最快、结构化、带坐标）；
+            #  多模型蒸馏 = 知识富集（补高德给不了的知识层，独立来源）。
+            #  网格忙时每 _ENRICH_EVERY 步插一次富集；网格扫完后全力富集。
+            pend = await grid_pending_cats()
+            grid_busy = (bool(pend) and api_calls < _MAX_API_PER_RUN
+                         and step < _HARD_MAX_STEPS)
+
+            #  富集插槽：web/小红书/抖音/事件/记忆蒸馏 **轮转调度**
+            #  (避免某源目标海量把其它源饿死，多源公平持续推进)
+            if ((not web_off or not xhs_off or not dy_off or not event_off
+                 or not enrich_off)
+                    and ((not grid_busy)
+                         or (step > 0 and step % _ENRICH_EVERY == 0))):
+                pool = ([("web", _web_targets, _WEB_BATCH, _web_enrich)]
+                        if not web_off else [])
+                pool += ([("xhs", _xhs_targets, _XHS_BATCH, _xhs_enrich)]
+                         if not xhs_off else [])
+                pool += ([("douyin", _dy_targets, 1, _douyin_enrich)]
+                         if not dy_off else [])
+                pool += ([("event", _events_targets, 3, _event_mine)]
+                         if not event_off else [])
+                pool += ([("distill", _enrich_targets, _ENRICH_BATCH,
+                           _distill_enrich)] if not enrich_off else [])
+                hit = False
+                for i in range(len(pool)):
+                    name, tfn, batch, efn = pool[(enrich_turn + i) % len(pool)]
+                    tg = await asyncio.to_thread(tfn, batch)
+                    if not tg:
+                        continue
+                    enrich_turn += 1
+                    step += 1
+                    ok = await efn(run_id, step, tg)
+                    if not ok:                       # 未就绪 → 本轮关该源
+                        if name == "web":
+                            web_off = True
+                        elif name == "xhs":
+                            xhs_off = True
+                        elif name == "douyin":
+                            dy_off = True
+                        elif name == "event":
+                            event_off = True
+                        else:
+                            enrich_off = True
+                        break
+                    hit = True
+                    break
+                if hit:
+                    await asyncio.sleep(_API_PACING)
+                    continue
+
+            if not grid_busy:
+                # 网格扫完且无可富集 → 驻守（不烧额度，下轮自动续，可一键停止）
+                reason = ("全城网格已扫完、知识富集也已补全，进入驻守巡检"
+                          "（不再消耗额度，新数据/工单结清后自动恢复，可停止）"
+                          if not pend else
+                          "达单进程安全上限，转驻守；下次启动自动从断点续扫")
+                await sa_set_status(run_id, "stewarding")
+                await sa_append_step(run_id, {"step": step,
+                                              "action": "steward",
+                                              "reason": reason})
+                if await _wait_steward():
+                    await sa_finish(run_id, "stopped")
+                    return
+                api_calls = 0          # 新驻守周期重置进程内计数
+                continue
+
+            # 聚焦：仍有待扫格的类里，挑当前藏品最少的（均衡推进）
+            cov = {i["cat"]: i["current"] for i in _coverage()["items"]}
+            focus = min(pend, key=lambda c: cov.get(c, 0))
+            cell = await grid_take_next(focus)
+            if not cell:
+                continue
+
+            tc = cell["typecode"]
+            page = cell["next_page"]
+            bbox = (cell["min_lng"], cell["min_lat"],
+                    cell["max_lng"], cell["max_lat"])
+            step += 1
+            try:
+                rows, raw = await asyncio.to_thread(
+                    search_polygon, tc, bbox, page, _OFFSET)
+            except Exception as e:
+                rows, raw = [], -1
+                err = str(e)[:140]
+            else:
+                err = None
+            api_calls += 1
+
+            res = await ingest_rows(rows, focus, focus) if rows else \
+                {"fetched": 0, "approved": 0, "pending": 0, "skipped": 0}
+            if err:
+                res["error"] = err
+
+            area = (f"经[{bbox[0]:.3f},{bbox[2]:.3f}] "
+                    f"纬[{bbox[1]:.3f},{bbox[3]:.3f}]")
+            # 状态机：扫尽 / 翻页 / 自适应细分(密集区 8 页仍满→四叉细分,防 POI 丢)
+            if err:
+                # 单格请求出错：不前进、不标尽，留待后续重试（不浪费已得）
+                nstatus, npage, note = "pending", page, f"请求失败重试 {err}"
+            elif raw <= 0 or raw < _OFFSET:
+                nstatus, npage, note = "exhausted", page, f"网格扫尽(本页{raw}条)"
+            elif page >= _MAX_PAGE_SPLIT and cell["depth"] < _MAX_DEPTH:
+                await grid_subdivide(cell["id"], focus, tc,
+                                     _quads(cell), cell["depth"] + 1)
+                nstatus, npage, note = None, page, "稠密→四叉细分深扫"
+            elif page >= _MAX_PAGE_SPLIT:
+                nstatus, npage, note = "exhausted", page, "达最深层，止于该格"
+            else:
+                nstatus, npage, note = "pending", page + 1, "本页满，续下一页"
+
+            if nstatus is not None:
+                await grid_update(cell["id"], npage,
+                                  res.get("approved", 0) + res.get("pending", 0),
+                                  nstatus)
+
+            reason = f"全城网格扫描 {area} 第{page}页（{focus}/{tc}）"
+            await sa_add_task(run_id, step, focus, "ingest", reason,
+                              "gaode_grid", result=res, status="done",
+                              note=f"格#{cell['id']} d{cell['depth']} p{page}·{note}")
+            await sa_append_step(run_id, {
+                "step": step,
+                "plan": {"poi_type": focus, "keyword": f"网格 {area} p{page}",
+                         "reason": reason},
+                "result": res,
+            }, ingested_delta=res.get("approved", 0))
+
+            # 该类全城扫尽却仍偏少 → 求助管理员（去重，馆长不停）
+            if focus not in escalated:
+                gc = await grid_counts()
+                g = gc.get(focus, {})
+                if g.get("total") and g["done"] >= g["total"]:
+                    maxc = max(cov.values()) if cov else 0
+                    if cov.get(focus, 0) < max(0.25 * maxc, 30):
+                        await _escalate(run_id, step, focus, cov.get(focus, 0))
+                    escalated.add(focus)
+
+            await asyncio.sleep(_API_PACING)
+    except Exception as e:  # noqa: BLE001
+        await sa_finish(run_id, "error", str(e)[:240])
+
+
+def schedule_super_agent(run_id: int) -> None:
+    t = asyncio.create_task(run_super_agent(run_id))
+    _BG.add(t)
+    t.add_done_callback(_BG.discard)