bxh/app/agents/super_orchestrator.py

"""Super Agent — 知识图谱馆长（全城网格自治采集，常驻不停）。

科学合理 & 省额度的核心：**地理网格扫描**，不再用"热度关键词"。
  • 把贵阳整个城市切成网格，逐格用高德"多边形(矩形)搜索 + 官方类型编码"
    系统性扫，每格每页都是不同地理切片 → 几乎不重复 → 不浪费 API 额度。
  • 每格扫到第几页**持久化**（gaode_grid_cells.next_page），跨步/跨轮接着扫，
    扫尽的格标记 exhausted **永不重复请求**（省额度的关键）。
  • 稠密区一格出不完 → **自适应四叉细分**，保证不漏。
  • 全城扫完不"结束"，转**驻守巡检**（不再消耗额度，仅响应停止/再播种）。
  • 进度=网格地理覆盖率，真实可反映，非拍脑袋数字。
"""
from __future__ import annotations

import asyncio
import re

from app.api.graph import _get_graph
from app.config import settings
from app.agents.gaode_connector import AMAP_TYPECODES, search_polygon
from app.agents.super_ingest import ingest_rows
from app.agents.distill_gate import distill_entity, ATTR_FIELDS
from app.agents.web_agent import web_enrich
from app.agents.xhs_agent import xhs_enrich
from app.agents.douyin_agent import douyin_enrich
from app.agents.event_miner import mine_events
from app.db import (
    sa_append_step, sa_finish, sa_stop_requested, sa_add_task, sa_set_status,
    sa_has_open_escalation, create_acquisition_task, create_notification,
    get_admin_user_id, grid_seed, grid_counts, grid_pending_cats,
    grid_take_next, grid_update, grid_subdivide,
    sa_merge_candidate_payload, sa_record_conflict, sa_record_schema_proposal,
)

_BG: set = set()

# 贵阳整市外接框 (min_lng, min_lat, max_lng, max_lat)：含主城+周边区县
_GY_BBOX = (106.20, 26.10, 107.30, 27.05)
_CELL = 0.08                 # 初始网格边长(°)，约 8km
_OFFSET = 25                 # 高德多边形单页上限
_MAX_PAGE_SPLIT = 8          # 单格翻到第N页仍满 → 太密 → 四叉细分
_MAX_DEPTH = 3               # 最深细分层级（0.08→约1km格）
_API_PACING = 0.35           # 每次高德调用间隔(秒)，护 QPS
_HARD_MAX_STEPS = 100000     # 仅极端兜底（持久化，跨轮续扫，不暴露用户）
_MAX_API_PER_RUN = 6000      # 单进程跑安全上限；命中转驻守，下轮自动续
_STEWARD_INTERVAL = 1800
_STEWARD_TICK = 10
_ENRICH_BATCH = 5            # 每步最多蒸馏富集多少个实体
_ENRICH_EVERY = 4            # 网格忙时每 N 步插一次知识富集
_WEB_BATCH = 2               # 每步最多联网富集多少个实体(真浏览器较慢)
_XHS_BATCH = 1               # 小红书浏览器很慢/脆，每步只采 1 个


def _place_counts() -> dict:
    g = _get_graph()
    out: dict = {}
    try:
        for row in g.query(
            "MATCH (p:Place) RETURN coalesce(p.place_type,'?'), count(*)"
        ).result_set:
            out[row[0]] = row[1]
    except Exception:
        pass
    return out


def _coverage() -> dict:
    from app.agents.super_ingest import _PT
    counts = _place_counts()
    cat2pt = {cat: _PT.get(cat, "poi") for cat in AMAP_TYPECODES}
    items = [{"cat": cat, "place_type": cat2pt[cat],
              "current": int(counts.get(cat2pt[cat], 0))}
             for cat in AMAP_TYPECODES]
    return {"items": items, "total": sum(i["current"] for i in items)}


def _grid_for(bbox: tuple, step: float) -> list[tuple]:
    mnlng, mnlat, mxlng, mxlat = bbox
    cells, lng = [], mnlng
    while lng < mxlng:
        lat = mnlat
        nlng = round(min(lng + step, mxlng), 6)
        while lat < mxlat:
            nlat = round(min(lat + step, mxlat), 6)
            cells.append((round(lng, 6), round(lat, 6), nlng, nlat))
            lat = nlat
        lng = nlng
    return cells


def _quads(c: dict) -> list[tuple]:
    """父格四等分为 4 个子格(矩形对半切)。"""
    mnlng, mnlat = c["min_lng"], c["min_lat"]
    mxlng, mxlat = c["max_lng"], c["max_lat"]
    midlng = round((mnlng + mxlng) / 2, 6)
    midlat = round((mnlat + mxlat) / 2, 6)
    return [
        (mnlng, mnlat, midlng, midlat), (midlng, mnlat, mxlng, midlat),
        (mnlng, midlat, midlng, mxlat), (midlng, midlat, mxlng, mxlat),
    ]


async def _seed_if_needed() -> None:
    have = await grid_counts()
    for cat, tc in AMAP_TYPECODES.items():
        if cat not in have:
            await grid_seed(cat, tc, _grid_for(_GY_BBOX, _CELL))


async def _escalate(run_id: int, step: int, cat: str, cur: int) -> None:
    if await sa_has_open_escalation(cat):
        await sa_add_task(run_id, step, cat, "escalate",
                          "全城网格已扫尽，该类仍偏少", "skip_dup",
                          status="escalated", note="已有未结工单，不重复打扰")
        return
    task_id = None
    try:
        task = await create_acquisition_task({
            "tenant_id": settings.default_tenant,
            "project_id": settings.default_project,
            "created_by": "super_agent",
            "title": f"【Super Agent 求助】「{cat}」全城网格已扫尽仍偏少",
            "description": (
                f"馆长已对贵阳全城做网格化高德采集，「{cat}」当前仅 {cur} 条，"
                f"高德官方源对该类覆盖有限。建议人工核查类型编码或改用"
                f"小红书/大众点评/官方名录等渠道补全。馆长继续驻守，"
                f"工单结清/数据增长后自动恢复。"),
            "scenario_tags": ["super_agent", "escalation", cat],
            "target_entity_types": ["Place"],
            "target_fields": [],
            "suggested_collection_method": "manual_or_alt_source",
            "priority": 1,
        })
        task_id = task["id"]
    except Exception:
        task_id = None
    try:
        uid = await get_admin_user_id()
        if uid:
            await create_notification(
                uid, title=f"Super Agent 求助：「{cat}」全城网格已扫尽",
                body=(f"「{cat}」仅 {cur} 条，高德源覆盖有限。已开工单"
                      + (f" #{task_id}" if task_id else "")
                      + "，请人工/改渠道；馆长继续驻守。"),
                ntype="task", related_task_id=task_id)
    except Exception:
        pass
    await sa_add_task(run_id, step, cat, "escalate",
                      "全城网格已扫尽，该类仍偏少", "notify_admin",
                      status="escalated", related_task_id=task_id)
    await sa_append_step(run_id, {
        "step": step, "action": "escalate", "cat": cat,
        "reason": f"「{cat}」全城网格扫尽仅{cur}条，已开工单并通知管理员，继续驻守"})


def _enrich_targets(limit: int) -> list[dict]:
    """未富集过的 Place + 锚点(名/址/区/类) + 现有软字段。

    取数显式排除 经纬度/电话（隐私红线，绝不外发给蒸馏模型）。
    """
    g = _get_graph()
    try:
        rs = g.query(
            "MATCH (p:Place) WHERE p.enrich_done IS NULL "
            "RETURN p.element_id, p.name, coalesce(p.place_type,''), "
            "coalesce(p.district,''), coalesce(p.address,''), "
            "coalesce(p.summary,''), coalesce(p.history,''), "
            "coalesce(p.features,''), coalesce(p.suitable_for,''), "
            "coalesce(p.best_season,''), coalesce(p.ticket_hint,'') "
            "LIMIT $n", {"n": limit}).result_set
    except Exception:
        return []
    out = []
    for r in rs:
        if not (r and r[1]):
            continue
        existing = {k: v for k, v in zip(
            ATTR_FIELDS, [r[5], r[6], r[7], r[8], r[9], r[10]]) if v}
        out.append({"eid": r[0], "name": r[1], "place_type": r[2],
                    "district": r[3], "address": r[4],
                    "existing": existing})
    return out


def _apply_enrich(eid: str, fields: dict) -> None:
    """共识字段写回 FalkorDB 节点；无论是否有字段都打 enrich_done 防重复空跑。"""
    g = _get_graph()
    sets = ["p.enrich_done=1"]
    params = {"eid": eid}
    for k in ATTR_FIELDS:
        if fields.get(k):
            sets.append(f"p.{k}=${k}")
            params[k] = fields[k]
    try:
        g.query(f"MATCH (p:Place {{element_id:$eid}}) SET {','.join(sets)}",
                params)
    except Exception:
        pass


async def _distill_enrich(run_id: int, step: int,
                          targets: list[dict]) -> bool:
    """工具：多模型知识蒸馏，给已有实体补"知识层"属性。

    独立数据来源（不是高德质检闸门）：问 N 个模型脑内知识 → 全局模型聚合
    跨模型共识 → 写回 FalkorDB 节点 + 候选 payload（审计可溯）。
    """
    enriched = adopt_fields = keep_total = conflict_total = uncertain_total = 0
    conflict_names: list[str] = []
    last = ""
    for t in targets:
        res = await distill_entity(t)
        last = res.get("summary", "")
        if not res.get("ok"):
            # 配置/连通问题：整批中止——不打标记(待配置后重试)、不空转
            await sa_add_task(
                run_id, step, "蒸馏", "distill",
                f"知识蒸馏未就绪：{last}", "distill_enrich",
                result={"fetched": 0, "approved": 0, "pending": 0,
                        "skipped": 0}, status="skipped", note=last)
            await sa_append_step(run_id, {
                "step": step, "action": "distill",
                "reason": f"知识蒸馏未配置/不可用（{last}），本步跳过，"
                          f"配好≥2蒸馏模型+全局聚合后自动恢复"})
            return False

        adopt = res.get("adopt") or {}
        _apply_enrich(t["eid"], adopt)        # 只写 adopt；无则仅打标记防空转
        if adopt:
            await sa_merge_candidate_payload(t["eid"], adopt)
            enriched += 1
            adopt_fields += len(adopt)
        keep_total += len(res.get("keep") or [])
        uncertain_total += len(res.get("uncertain") or [])

        # 与图谱既有值矛盾 → 不覆盖，落 validation_issues 转人工
        for c in (res.get("conflict") or []):
            await sa_record_conflict(
                t["eid"], c.get("field", ""), c.get("existing", ""),
                c.get("distilled", ""), c.get("note", ""))
            conflict_total += 1
            if t.get("name"):
                conflict_names.append(f"{t['name']}·{c.get('field','')}")

    res_obj = {"fetched": len(targets), "approved": adopt_fields,
               "pending": conflict_total, "skipped": uncertain_total}
    await sa_add_task(
        run_id, step, "蒸馏", "distill",
        f"多模型知识蒸馏富集 {len(targets)} 个实体（{last}）",
        "distill_enrich", result=res_obj, status="done",
        note=f"富集{enriched}实体/{adopt_fields}字段·一致{keep_total}"
             f"·矛盾{conflict_total}·存疑{uncertain_total}")
    await sa_append_step(run_id, {
        "step": step, "action": "distill",
        "reason": f"知识蒸馏富集：{enriched}/{len(targets)} 实体补 {adopt_fields} 字段，"
                  f"矛盾 {conflict_total} 转人工（{last}）"})

    if conflict_total:
        try:
            uid = await get_admin_user_id()
            if uid:
                await create_notification(
                    uid,
                    title=f"蒸馏发现 {conflict_total} 处与图谱既有数据矛盾",
                    body=("需人工裁决（蒸馏未覆盖图谱）："
                          + "、".join(conflict_names[:8])
                          + ("…" if len(conflict_names) > 8 else "")
                          + "。详见 数据质量 / 校验问题(distill_conflict)。"),
                    ntype="task")
        except Exception:
            pass
    return True


def _web_targets(limit: int) -> list[dict]:
    """未联网富集过的 Place（无 web_done）+ 锚点 + 现有软字段。
    取数显式排除 经纬度/电话（隐私红线）。"""
    g = _get_graph()
    try:
        # 景点最可能有公开百科页 → 优先联网富集，命中率最高
        rs = g.query(
            "MATCH (p:Place) WHERE p.web_done IS NULL "
            "RETURN p.element_id, p.name, coalesce(p.place_type,''), "
            "coalesce(p.district,''), coalesce(p.address,''), "
            "coalesce(p.summary,''), coalesce(p.history,''), "
            "coalesce(p.features,''), coalesce(p.suitable_for,''), "
            "coalesce(p.best_season,''), coalesce(p.ticket_hint,'') "
            "ORDER BY CASE WHEN p.place_type='sight' THEN 0 ELSE 1 END, "
            "p.element_id LIMIT $n", {"n": limit}).result_set
    except Exception:
        return []
    out = []
    for r in rs:
        if not (r and r[1]):
            continue
        existing = {k: v for k, v in zip(
            ATTR_FIELDS, [r[5], r[6], r[7], r[8], r[9], r[10]]) if v}
        out.append({"eid": r[0], "name": r[1], "place_type": r[2],
                    "district": r[3], "address": r[4], "existing": existing})
    return out


def _apply_web(eid: str, adopt: dict) -> None:
    """网页采纳字段写回；打 web_done(必)；有 adopt 则连 enrich_done 一并打
    (网页权威，省一次记忆蒸馏)。"""
    g = _get_graph()
    sets = ["p.web_done=1"]
    params = {"eid": eid}
    if adopt:
        sets.append("p.enrich_done=1")
    for k in ATTR_FIELDS:
        if adopt.get(k):
            sets.append(f"p.{k}=${k}")
            params[k] = adopt[k]
    try:
        g.query(f"MATCH (p:Place {{element_id:$eid}}) SET {','.join(sets)}",
                params)
    except Exception:
        pass


async def _web_enrich(run_id: int, step: int, targets: list[dict]) -> bool:
    """工具：browser-use 式联网采集（真浏览器抓权威页 → opus 抽取对齐）。"""
    enriched = adopt_fields = found = conflict_total = gap_total = 0
    last = ""
    for t in targets:
        r = await web_enrich(t)
        last = r.get("summary", "")
        if not r.get("ok"):
            await sa_add_task(run_id, step, "联网", "web",
                              f"web_agent 未就绪：{last}", "web_agent",
                              result={"fetched": 0, "approved": 0,
                                      "pending": 0, "skipped": 0},
                              status="skipped", note=last)
            await sa_append_step(run_id, {
                "step": step, "action": "web",
                "reason": f"web_agent 未配置/不可用（{last}），跳过待恢复"})
            return False
        if not r.get("found") or r.get("entity_match") is False:
            _apply_web(t["eid"], {})           # 标记 web_done，不再重复抓
            continue
        found += 1
        adopt = r.get("adopt") or {}
        _apply_web(t["eid"], adopt)
        if adopt:
            await sa_merge_candidate_payload(t["eid"], adopt)
            enriched += 1
            adopt_fields += len(adopt)
        for c in (r.get("conflict") or []):
            await sa_record_conflict(t["eid"], c.get("field", ""),
                                     c.get("existing", ""),
                                     c.get("web", ""), c.get("note", ""))
            conflict_total += 1
        for sgap in (r.get("schema_gaps") or []):
            iid = await sa_record_schema_proposal(
                sgap.get("attr", ""), sgap.get("field", "")
                or re.sub(r"\W+", "_", sgap.get("attr", "")).strip("_"),
                str(sgap.get("value", ""))[:200],
                f"web_agent 在「{t['name']}」网页发现：{sgap.get('why','')}",
                float(r.get("confidence") or 0.7))
            if iid:
                gap_total += 1

    res_obj = {"fetched": len(targets), "approved": adopt_fields,
               "pending": conflict_total, "skipped": gap_total}
    await sa_add_task(
        run_id, step, "联网", "web",
        f"browser-use 联网采集 {len(targets)} 个实体（{last}）",
        "web_agent", result=res_obj, status="done",
        note=f"命中{found}·补{adopt_fields}字段·矛盾{conflict_total}"
             f"·schema提案{gap_total}")
    await sa_append_step(run_id, {
        "step": step, "action": "web",
        "reason": f"联网采集：命中 {found}/{len(targets)}，补 {adopt_fields} 字段，"
                  f"矛盾 {conflict_total} 转人工，schema 提案 {gap_total}（{last}）"})
    if conflict_total or gap_total:
        try:
            uid = await get_admin_user_id()
            if uid:
                await create_notification(
                    uid,
                    title=f"web_agent：{conflict_total} 矛盾 / {gap_total} schema 提案待裁决",
                    body="联网采集与图谱矛盾或发现新属性，详见 数据质量/校验问题 "
                         "与 本体建模/字段提案。",
                    ntype="task")
        except Exception:
            pass
    return True


def _xhs_targets(limit: int) -> list[dict]:
    """未采过小红书(无 xhs_done)的 Place；美食最优先(小红书食/玩为主)。"""
    g = _get_graph()
    try:
        rs = g.query(
            "MATCH (p:Place) WHERE p.xhs_done IS NULL "
            "RETURN p.element_id, p.name, coalesce(p.place_type,''), "
            "coalesce(p.district,'') "
            "ORDER BY CASE WHEN p.place_type='eat' THEN 0 "
            "WHEN p.place_type='sight' THEN 1 ELSE 2 END, p.element_id "
            "LIMIT $n", {"n": limit}).result_set
    except Exception:
        return []
    return [{"eid": r[0], "name": r[1], "place_type": r[2], "district": r[3]}
            for r in rs if r and r[1]]


def _apply_xhs(eid: str, tags: list[str]) -> None:
    """体验标签写回：MERGE ExperienceTag + (Place)-[:HAS_TAG]->(tag)；
    无论有无标签都打 xhs_done 防重复。"""
    g = _get_graph()
    try:
        g.query("MATCH (p:Place {element_id:$eid}) SET p.xhs_done=1",
                {"eid": eid})
        for t in tags:
            g.query(
                "MATCH (p:Place {element_id:$eid}) "
                "MERGE (e:ExperienceTag {name:$t}) "
                "MERGE (p)-[:HAS_TAG]->(e) SET e.source='xiaohongshu'",
                {"eid": eid, "t": t})
    except Exception:
        pass


async def _xhs_enrich(run_id: int, step: int, targets: list[dict]) -> bool:
    """工具：小红书 UGC → 体验标签。未登录→升级人工(一次性登录)，本轮关闭。"""
    done = tags_total = found = ev_total = 0
    last = ""
    for t in targets:
        r = await xhs_enrich(t)
        last = r.get("summary", "")
        if not r.get("ok"):
            await sa_add_task(run_id, step, "小红书", "xhs",
                              f"xhs_agent 未就绪：{last}", "xhs_agent",
                              result={"fetched": 0, "approved": 0,
                                      "pending": 0, "skipped": 0},
                              status="skipped", note=last)
            await sa_append_step(run_id, {"step": step, "action": "xhs",
                                          "reason": f"小红书未配置/停用（{last}）"})
            return False
        if r.get("need_login"):
            if not await sa_has_open_escalation("小红书登录"):
                try:
                    task = await create_acquisition_task({
                        "tenant_id": settings.default_tenant,
                        "project_id": settings.default_project,
                        "created_by": "super_agent",
                        "title": "【Super Agent 求助】小红书需一次性人工登录",
                        "description": "后台 xhs_agent 检测到未登录。请在项目根目录运行 "
                                       "`python3 scripts/xhs_login.py`，弹出浏览器里登录"
                                       "小红书一次；cookie 持久化后馆长自动恢复采集。",
                        "scenario_tags": ["super_agent", "escalation", "小红书登录"],
                        "target_entity_types": ["Place"], "target_fields": [],
                        "suggested_collection_method": "manual_login",
                        "priority": 1})
                    uid = await get_admin_user_id()
                    if uid:
                        await create_notification(
                            uid, title="小红书需一次性登录",
                            body="运行 scripts/xhs_login.py 登录一次即可，馆长自动恢复。",
                            ntype="task", related_task_id=task["id"])
                except Exception:
                    pass
            await sa_add_task(run_id, step, "小红书", "xhs",
                              "小红书未登录，已升级人工一次性登录", "notify_admin",
                              result={"fetched": 0, "approved": 0,
                                      "pending": 0, "skipped": 0},
                              status="escalated", note="待 scripts/xhs_login.py")
            await sa_append_step(run_id, {
                "step": step, "action": "xhs",
                "reason": "小红书未登录 → 已开工单+通知(运行 xhs_login.py)，本轮暂停小红书"})
            return False
        ev_total += int(r.get("evidence_saved") or 0)
        if r.get("found"):
            _apply_xhs(t["eid"], r.get("tags") or [])
            found += 1
            if r.get("tags"):
                done += 1
                tags_total += len(r["tags"])
        else:
            _apply_xhs(t["eid"], [])           # 标记，避免重复
    res_obj = {"fetched": len(targets), "approved": tags_total,
               "pending": ev_total, "skipped": len(targets) - found}
    await sa_add_task(run_id, step, "小红书", "xhs",
                      f"小红书 UGC 采集 {len(targets)} 个实体（{last}）",
                      "xhs_agent", result=res_obj, status="done",
                      note=f"命中{found}·证据{ev_total}条·体验标签{tags_total}")
    await sa_append_step(run_id, {
        "step": step, "action": "xhs",
        "reason": f"小红书采集：命中 {found}/{len(targets)}，证据入库 {ev_total} 条，"
                  f"产出 {tags_total} 个体验标签（{last}）"})
    return True


def _dy_targets(limit: int) -> list[dict]:
    """未采过抖音(无 dy_done)的 Place；美食/景点优先(抖音食玩为主)。"""
    g = _get_graph()
    try:
        rs = g.query(
            "MATCH (p:Place) WHERE p.dy_done IS NULL "
            "RETURN p.element_id, p.name, coalesce(p.place_type,''), "
            "coalesce(p.district,'') "
            "ORDER BY CASE WHEN p.place_type='eat' THEN 0 "
            "WHEN p.place_type='sight' THEN 1 ELSE 2 END, p.element_id "
            "LIMIT $n", {"n": limit}).result_set
    except Exception:
        return []
    return [{"eid": r[0], "name": r[1], "place_type": r[2], "district": r[3]}
            for r in rs if r and r[1]]


def _apply_dy(eid: str, tags: list[str]) -> None:
    g = _get_graph()
    try:
        g.query("MATCH (p:Place {element_id:$eid}) SET p.dy_done=1",
                {"eid": eid})
        for t in tags:
            g.query(
                "MATCH (p:Place {element_id:$eid}) "
                "MERGE (e:ExperienceTag {name:$t}) "
                "MERGE (p)-[:HAS_TAG]->(e) SET e.source='douyin'",
                {"eid": eid, "t": t})
    except Exception:
        pass


async def _douyin_enrich(run_id: int, step: int,
                         targets: list[dict]) -> bool:
    """工具：抖音 UGC → 证据层+体验标签。未登录→升级一次性登录，本轮关闭。"""
    done = tags_total = found = ev_total = 0
    last = ""
    for t in targets:
        r = await douyin_enrich(t)
        last = r.get("summary", "")
        if not r.get("ok"):
            await sa_add_task(run_id, step, "抖音", "douyin",
                              f"douyin_agent 未就绪：{last}", "douyin_agent",
                              result={"fetched": 0, "approved": 0,
                                      "pending": 0, "skipped": 0},
                              status="skipped", note=last)
            await sa_append_step(run_id, {"step": step, "action": "douyin",
                                          "reason": f"抖音未配置/停用（{last}）"})
            return False
        if r.get("need_login"):
            if not await sa_has_open_escalation("抖音登录"):
                try:
                    task = await create_acquisition_task({
                        "tenant_id": settings.default_tenant,
                        "project_id": settings.default_project,
                        "created_by": "super_agent",
                        "title": "【Super Agent 求助】抖音需一次性人工登录",
                        "description": "后台 douyin_agent 检测到未登录。请在项目根目录"
                                       "运行 `python3 scripts/douyin_login.py`，弹出"
                                       "浏览器里登录抖音一次；cookie 持久化后自动恢复。",
                        "scenario_tags": ["super_agent", "escalation",
                                          "抖音登录"],
                        "target_entity_types": ["Place"], "target_fields": [],
                        "suggested_collection_method": "manual_login",
                        "priority": 1})
                    uid = await get_admin_user_id()
                    if uid:
                        await create_notification(
                            uid, title="抖音需一次性登录",
                            body="运行 scripts/douyin_login.py 登录一次即可，"
                                 "馆长自动恢复。",
                            ntype="task", related_task_id=task["id"])
                except Exception:
                    pass
            await sa_add_task(run_id, step, "抖音", "douyin",
                              "抖音未登录，已升级人工一次性登录", "notify_admin",
                              result={"fetched": 0, "approved": 0,
                                      "pending": 0, "skipped": 0},
                              status="escalated",
                              note="待 scripts/douyin_login.py")
            await sa_append_step(run_id, {
                "step": step, "action": "douyin",
                "reason": "抖音未登录 → 已开工单+通知(运行 douyin_login.py)，"
                          "本轮暂停抖音"})
            return False
        ev_total += int(r.get("evidence_saved") or 0)
        if r.get("found"):
            _apply_dy(t["eid"], r.get("tags") or [])
            found += 1
            if r.get("tags"):
                done += 1
                tags_total += len(r["tags"])
        else:
            _apply_dy(t["eid"], [])
    res_obj = {"fetched": len(targets), "approved": tags_total,
               "pending": ev_total, "skipped": len(targets) - found}
    await sa_add_task(run_id, step, "抖音", "douyin",
                      f"抖音 UGC 采集 {len(targets)} 个实体（{last}）",
                      "douyin_agent", result=res_obj, status="done",
                      note=f"命中{found}·证据{ev_total}条·体验标签{tags_total}")
    await sa_append_step(run_id, {
        "step": step, "action": "douyin",
        "reason": f"抖音采集：命中 {found}/{len(targets)}，证据入库 {ev_total} 条，"
                  f"产出 {tags_total} 个体验标签（{last}）"})
    return True


def _events_targets(limit: int) -> list[dict]:
    """已采过任何源(web/xhs/dy) 但还没抽过事件(无 events_done)的 Place。

    平台无关：只要 social_evidence 里有该 pnk 的证据(任意 platform)，
    都可以挖事件,不再硬要求 xhs_done。
    """
    g = _get_graph()
    try:
        rs = g.query(
            "MATCH (p:Place) "
            "WHERE (p.web_done=1 OR p.xhs_done=1 OR p.dy_done=1) "
            "  AND p.events_done IS NULL "
            "RETURN p.element_id, p.name LIMIT $n", {"n": limit}).result_set
    except Exception:
        return []
    return [{"eid": r[0], "name": r[1]} for r in rs if r and r[1]]


def _apply_events(eid: str, events: list[dict]) -> None:
    """事件写回：MERGE Event + (Place)-[:HAS_EVENT{time,type}]->(Event)。

    - source 不再硬编 'xiaohongshu',按事件实际来源(baike/wiki/xhs/douyin)写
    - 加 time_norm(排序用) / participants(涉及人物) / confidence(置信度)
    - MERGE 用 (place,title) 幂等,同名事件不会重复
    - 无论有无都打 events_done 防重复整批
    """
    g = _get_graph()
    try:
        g.query("MATCH (p:Place {element_id:$eid}) SET p.events_done=1",
                {"eid": eid})
        for e in events:
            g.query(
                "MATCH (p:Place {element_id:$eid}) "
                "MERGE (ev:Event {place:$eid, title:$t}) "
                "SET ev.time=$tm, ev.time_norm=$tn, ev.type=$ty, "
                "    ev.desc=$d, ev.source=$src, ev.participants=$ppl, "
                "    ev.confidence=$conf "
                "MERGE (p)-[:HAS_EVENT {time:$tn, type:$ty}]->(ev)",
                {"eid": eid, "t": e.get("title", ""),
                 "tm": e.get("time", ""),
                 "tn": e.get("time_norm", "") or e.get("time", ""),
                 "ty": e.get("type", ""), "d": e.get("desc", ""),
                 "src": e.get("source_platform", "") or "mixed",
                 "ppl": ",".join(e.get("participants") or []),
                 "conf": float(e.get("confidence") or 0)})
    except Exception:
        pass


async def _event_mine(run_id: int, step: int, targets: list[dict]) -> bool:
    """工具：从证据层评论/帖子抽时间锚定事件(纯 LLM，不爬网)。"""
    done = ev_total = 0
    last = ""
    for t in targets:
        r = await mine_events(t)
        last = r.get("summary", "")
        if not r.get("ok"):
            await sa_add_task(run_id, step, "事件", "event",
                              f"event_miner 未就绪：{last}", "event_miner",
                              result={"fetched": 0, "approved": 0,
                                      "pending": 0, "skipped": 0},
                              status="skipped", note=last)
            await sa_append_step(run_id, {"step": step, "action": "event",
                                          "reason": f"事件抽取未配置（{last}）"})
            return False
        evs = r.get("events") or []
        _apply_events(t["eid"], evs)
        if evs:
            done += 1
            ev_total += len(evs)
    res_obj = {"fetched": len(targets), "approved": ev_total,
               "pending": 0, "skipped": len(targets) - done}
    await sa_add_task(run_id, step, "事件", "event",
                      f"评论时间→事件抽取 {len(targets)} 个实体（{last}）",
                      "event_miner", result=res_obj, status="done",
                      note=f"命中{done}·事件{ev_total}")
    await sa_append_step(run_id, {
        "step": step, "action": "event",
        "reason": f"事件抽取：{done}/{len(targets)} 实体，"
                  f"产出 {ev_total} 个时间锚定事件（{last}）"})
    return True


async def run_super_agent(run_id: int) -> None:
    await _seed_if_needed()
    step = 0
    api_calls = 0
    enrich_off = False           # 记忆蒸馏未配置/不可用时本轮关闭
    web_off = False              # web_agent 未就绪时本轮关闭，避免空转
    xhs_off = False              # 小红书未配置/未登录时本轮关闭
    dy_off = False               # 抖音未配置/未登录时本轮关闭
    event_off = False            # 事件抽取未配置时本轮关闭
    enrich_turn = 0              # 富集源轮转(web/xhs/douyin/event/distill 公平)
    escalated: set[str] = set()

    async def _wait_steward() -> bool:
        waited = 0
        while waited < _STEWARD_INTERVAL:
            if await sa_stop_requested(run_id):
                return True
            await asyncio.sleep(_STEWARD_TICK)
            waited += _STEWARD_TICK
        return False

    try:
        while True:
            if await sa_stop_requested(run_id):
                await sa_finish(run_id, "stopped")
                return

            # ── 指挥大脑选工具 ───────────────────────────────────────
            #  高德网格法 = 主力快采（最快、结构化、带坐标）；
            #  多模型蒸馏 = 知识富集（补高德给不了的知识层，独立来源）。
            #  网格忙时每 _ENRICH_EVERY 步插一次富集；网格扫完后全力富集。
            pend = await grid_pending_cats()
            grid_busy = (bool(pend) and api_calls < _MAX_API_PER_RUN
                         and step < _HARD_MAX_STEPS)

            #  富集插槽：web/小红书/抖音/事件/记忆蒸馏 **轮转调度**
            #  (避免某源目标海量把其它源饿死，多源公平持续推进)
            if ((not web_off or not xhs_off or not dy_off or not event_off
                 or not enrich_off)
                    and ((not grid_busy)
                         or (step > 0 and step % _ENRICH_EVERY == 0))):
                pool = ([("web", _web_targets, _WEB_BATCH, _web_enrich)]
                        if not web_off else [])
                pool += ([("xhs", _xhs_targets, _XHS_BATCH, _xhs_enrich)]
                         if not xhs_off else [])
                pool += ([("douyin", _dy_targets, 1, _douyin_enrich)]
                         if not dy_off else [])
                pool += ([("event", _events_targets, 3, _event_mine)]
                         if not event_off else [])
                pool += ([("distill", _enrich_targets, _ENRICH_BATCH,
                           _distill_enrich)] if not enrich_off else [])
                hit = False
                for i in range(len(pool)):
                    name, tfn, batch, efn = pool[(enrich_turn + i) % len(pool)]
                    tg = await asyncio.to_thread(tfn, batch)
                    if not tg:
                        continue
                    enrich_turn += 1
                    step += 1
                    ok = await efn(run_id, step, tg)
                    if not ok:                       # 未就绪 → 本轮关该源
                        if name == "web":
                            web_off = True
                        elif name == "xhs":
                            xhs_off = True
                        elif name == "douyin":
                            dy_off = True
                        elif name == "event":
                            event_off = True
                        else:
                            enrich_off = True
                        break
                    hit = True
                    break
                if hit:
                    await asyncio.sleep(_API_PACING)
                    continue

            if not grid_busy:
                # 网格扫完且无可富集 → 驻守（不烧额度，下轮自动续，可一键停止）
                reason = ("全城网格已扫完、知识富集也已补全，进入驻守巡检"
                          "（不再消耗额度，新数据/工单结清后自动恢复，可停止）"
                          if not pend else
                          "达单进程安全上限，转驻守；下次启动自动从断点续扫")
                await sa_set_status(run_id, "stewarding")
                await sa_append_step(run_id, {"step": step,
                                              "action": "steward",
                                              "reason": reason})
                if await _wait_steward():
                    await sa_finish(run_id, "stopped")
                    return
                api_calls = 0          # 新驻守周期重置进程内计数
                continue

            # 聚焦：仍有待扫格的类里，挑当前藏品最少的（均衡推进）
            cov = {i["cat"]: i["current"] for i in _coverage()["items"]}
            focus = min(pend, key=lambda c: cov.get(c, 0))
            cell = await grid_take_next(focus)
            if not cell:
                continue

            tc = cell["typecode"]
            page = cell["next_page"]
            bbox = (cell["min_lng"], cell["min_lat"],
                    cell["max_lng"], cell["max_lat"])
            step += 1
            try:
                rows, raw = await asyncio.to_thread(
                    search_polygon, tc, bbox, page, _OFFSET)
            except Exception as e:
                rows, raw = [], -1
                err = str(e)[:140]
            else:
                err = None
            api_calls += 1

            res = await ingest_rows(rows, focus, focus) if rows else \
                {"fetched": 0, "approved": 0, "pending": 0, "skipped": 0}
            if err:
                res["error"] = err

            area = (f"经[{bbox[0]:.3f},{bbox[2]:.3f}] "
                    f"纬[{bbox[1]:.3f},{bbox[3]:.3f}]")
            # 状态机：扫尽 / 翻页 / 自适应细分(密集区 8 页仍满→四叉细分,防 POI 丢)
            if err:
                # 单格请求出错：不前进、不标尽，留待后续重试（不浪费已得）
                nstatus, npage, note = "pending", page, f"请求失败重试 {err}"
            elif raw <= 0 or raw < _OFFSET:
                nstatus, npage, note = "exhausted", page, f"网格扫尽(本页{raw}条)"
            elif page >= _MAX_PAGE_SPLIT and cell["depth"] < _MAX_DEPTH:
                await grid_subdivide(cell["id"], focus, tc,
                                     _quads(cell), cell["depth"] + 1)
                nstatus, npage, note = None, page, "稠密→四叉细分深扫"
            elif page >= _MAX_PAGE_SPLIT:
                nstatus, npage, note = "exhausted", page, "达最深层，止于该格"
            else:
                nstatus, npage, note = "pending", page + 1, "本页满，续下一页"

            if nstatus is not None:
                await grid_update(cell["id"], npage,
                                  res.get("approved", 0) + res.get("pending", 0),
                                  nstatus)

            reason = f"全城网格扫描 {area} 第{page}页（{focus}/{tc}）"
            await sa_add_task(run_id, step, focus, "ingest", reason,
                              "gaode_grid", result=res, status="done",
                              note=f"格#{cell['id']} d{cell['depth']} p{page}·{note}")
            await sa_append_step(run_id, {
                "step": step,
                "plan": {"poi_type": focus, "keyword": f"网格 {area} p{page}",
                         "reason": reason},
                "result": res,
            }, ingested_delta=res.get("approved", 0))

            # 该类全城扫尽却仍偏少 → 求助管理员（去重，馆长不停）
            if focus not in escalated:
                gc = await grid_counts()
                g = gc.get(focus, {})
                if g.get("total") and g["done"] >= g["total"]:
                    maxc = max(cov.values()) if cov else 0
                    if cov.get(focus, 0) < max(0.25 * maxc, 30):
                        await _escalate(run_id, step, focus, cov.get(focus, 0))
                    escalated.add(focus)

            await asyncio.sleep(_API_PACING)
    except Exception as e:  # noqa: BLE001
        await sa_finish(run_id, "error", str(e)[:240])


def schedule_super_agent(run_id: int) -> None:
    t = asyncio.create_task(run_super_agent(run_id))
    _BG.add(t)
    t.add_done_callback(_BG.discard)