bxh/app/agents/douyin_agent.py

"""抖音子 Agent（三期，移植自 douyin_visible_probe 的成熟链路）。

核心修复:之前 comment=0 的原因是没走通"进详情 → 点全屏 → 显式开评论 →
滚入视口 → 找到真正可滚的元素 → 听 /comment API + DOM 双轨"。本版本把
probe 已经跑通的所有技巧搬进来,自治版同样能拉到全量评论。

外部接口保持不变:
  - `douyin_enrich(entity) -> dict`  (供编排器 _douyin_enrich)
  - `DY_PROFILE_DIR`, `_unlock`      (供 scripts/douyin_login.py)
对内做法升级:
  - listen 同时记 url+status+body(评论可借 url 回填 note_id)
  - 解析用 iter_lists_by_key 深递归,适配 douyin JSON 嵌套多变
  - 详情页:click_or_goto + 全屏按钮 + 显式点开评论 + 滚入视口 + 智能滚动
  - 终止条件:API 增长 + DOM 项 + "暂时没有更多评论"任一触发
"""
from __future__ import annotations

import asyncio
import json
import os
import random
import time
from urllib.parse import parse_qs, quote, urlparse

from app.config import settings
from app.db import get_agent_settings, sa_save_evidence
from app.llm_client import LlmClient

# ── 路径 / 端点 ─────────────────────────────────────────────────────────
DY_PROFILE_DIR = os.path.expanduser("~/.zn-kg/douyin-profile")
_SEARCH = "https://www.douyin.com/search/{kw}?type=general"

# 监听:解析与计数走这里(精准)
_LISTEN = (
    "/aweme/v1/web/aweme/detail",
    "/aweme/v1/web/comment/list",
    "/aweme/v1/web/comment/list/reply",
    "/aweme/v1/web/general/search/stream",
    "/aweme/v1/web/general/search/single",
    "/aweme/v1/web/search/item",
    "/aweme/v1/web/general/search",
)
# 诊断:URL 记录走这里(更宽,排错可视)
_DIAG = ("https://www.douyin.com/aweme/v1/web/", "https://www.douyin.com/search/")

_COMMENT_SCROLLERS = (
    "[data-e2e='comment-list']",
    "[data-e2e='detail-comment']",
    ".comment-list",
    ".comment-mainContent",
    ".ESlRWJ2j",
)
_DOM_COMMENT_ITEM = "[data-e2e='comment-item']"

# ── 调参(全为模块常量,后续要调直接改) ──────────────────────────────────
_DEEP_NOTES = 3                  # 单次最多深采几个视频
_SEARCH_SCROLLS = 6              # 搜索页最多滚几次
_MAX_SEARCH_LINKS = 40
_SEARCH_WAIT_MS = 4500           # 搜索页首屏等
_SEARCH_API_WAIT_MS = 12000      # 等搜索 API 出货
_DETAIL_WAIT_MS = 3800           # 进详情后给页面布局时间
_FULLSCREEN_WAIT_MS = 1800
_COMMENT_WAIT_MS = 3500
_COMMENT_SCROLLS = 30
_DEEP_STALL = 4                  # 连续 N 次评论无新增即停

# ── 提示词 ─────────────────────────────────────────────────────────────
_TAB_SYS = """你在抖音搜索结果页选 tab。给业务需求 + 当前可选 tab 列表,
选最相关的一个 tab 名;拿不准选「综合」。
只输出 JSON:{"tab":"tab名","reason":"一句话"}"""

_TAG_SYS = """你从抖音 UGC(视频标题/文案/评论)提炼某地点的"体验标签"。
只留多条相互印证、对该地点有信息量的短标签(如:酸汤鱼必点、排队久、人均50、
网红打卡、本地人推荐、环境一般)。广告/无关/噪声一律丢弃。
只输出 JSON:{"tags":["标签1"],"sentiment":"正面|中性|负面|混合","summary":"一句话口碑"}"""


# ── 通用工具 ────────────────────────────────────────────────────────────
def _unlock(profile_dir: str = DY_PROFILE_DIR) -> None:
    """清掉上次被 kill 的 Chromium Singleton 死锁。"""
    for name in ("SingletonLock", "SingletonCookie", "SingletonSocket"):
        try:
            os.unlink(os.path.join(profile_dir, name))
        except OSError:
            pass


def _scalar(obj, *keys, default=""):
    if not isinstance(obj, dict):
        return default
    for k in keys:
        v = obj.get(k)
        if v not in (None, "", [], {}):
            return v
    return default


def _first_avatar(user) -> str:
    if not isinstance(user, dict):
        return ""
    av = user.get("avatar_thumb") or user.get("avatar_medium") or {}
    urls = av.get("url_list") if isinstance(av, dict) else None
    if isinstance(urls, list) and urls:
        return str(urls[0] or "")
    return ""


def _as_int(value) -> int:
    try:
        if value in (None, ""):
            return 0
        return int(value)
    except (TypeError, ValueError):
        return 0


def _body_of(entry):
    if isinstance(entry, dict) and "__body" in entry:
        return entry.get("__body")
    return entry


def _url_of(entry) -> str:
    if isinstance(entry, dict):
        return str(entry.get("__url") or "")
    return ""


def _aweme_id_from_url(url: str) -> str:
    try:
        qs = parse_qs(urlparse(url).query)
    except Exception:
        return ""
    for k in ("aweme_id", "item_id", "group_id"):
        v = qs.get(k)
        if v:
            return str(v[0] or "")
    return ""


def _iter_lists_by_key(obj, keys: set):
    """深递归找到所有匹配 key 的 list 节点(适配 douyin 多变嵌套)。"""
    if isinstance(obj, dict):
        for k, v in obj.items():
            if k in keys and isinstance(v, list):
                yield v
            elif isinstance(v, (dict, list)):
                yield from _iter_lists_by_key(v, keys)
    elif isinstance(obj, list):
        for it in obj:
            yield from _iter_lists_by_key(it, keys)


# ── 解析(深递归+多路径) ────────────────────────────────────────────────
def _parse_dy_notes(raw_api: list, name: str, keyword: str) -> list[dict]:
    out, seen = [], set()
    for entry in raw_api or []:
        body = _body_of(entry)
        if not isinstance(body, dict):
            continue
        cands: list = []
        data = body.get("data")
        d_dict = data if isinstance(data, dict) else {}
        for v in (
            data if isinstance(data, list) else None,
            body.get("aweme_detail"),
            body.get("aweme_list"),
            d_dict.get("aweme_detail"),
            d_dict.get("aweme_list"),
            d_dict.get("data"),
        ):
            if isinstance(v, dict):
                cands.append(v)
            elif isinstance(v, list):
                cands.extend(v)
        for nested in _iter_lists_by_key(body, {"aweme_list"}):
            cands.extend(nested)

        for item in cands:
            if not isinstance(item, dict):
                continue
            aw = item.get("aweme_info") or item.get("aweme") or item
            if not isinstance(aw, dict):
                continue
            aid = _scalar(aw, "aweme_id", "group_id", "awemeId")
            if not aid or str(aid) in seen:
                continue
            seen.add(str(aid))
            au = aw.get("author") or {}
            st = aw.get("statistics") or {}
            out.append({
                "platform": "douyin", "kind": "note", "source_id": str(aid),
                "url": f"https://www.douyin.com/video/{aid}",
                "entity_name": name, "keyword": keyword,
                "title": _scalar(aw, "desc", "title", "caption"),
                "content": _scalar(aw, "desc", "content"),
                "author": _scalar(au, "nickname", "name"),
                "author_id": str(_scalar(au, "uid", "sec_uid", "unique_id")),
                "author_avatar": _first_avatar(au),
                "likes": _as_int(_scalar(st, "digg_count", "admire_count",
                                         default=0)),
                "comments": _as_int(_scalar(st, "comment_count", default=0)),
                "collects": _as_int(_scalar(st, "collect_count",
                                            "favorite_count",
                                            "collects_count", default=0)),
                "shares": _as_int(_scalar(st, "share_count",
                                          "share_count_reflow", default=0)),
                "publish_time": str(_scalar(aw, "create_time", default="")),
                "location": "", "tags": [], "image_urls": [], "raw": item,
            })
    return out


def _parse_dy_comments(raw_api: list, name: str, keyword: str) -> list[dict]:
    out, seen = [], set()
    for entry in raw_api or []:
        body = _body_of(entry)
        if not isinstance(body, dict):
            continue
        # 评论缺 aweme_id 时,可从响应 URL 的 query 兜回填
        fallback_aid = _aweme_id_from_url(_url_of(entry))
        for comments in _iter_lists_by_key(
                body, {"comments", "comment_list", "reply_comments"}):
            for cm in comments:
                if not isinstance(cm, dict):
                    continue
                cid = _scalar(cm, "cid", "comment_id", "id")
                text = _scalar(cm, "text", "content")
                if not cid or not text or str(cid) in seen:
                    continue
                seen.add(str(cid))
                u = cm.get("user") or cm.get("user_info") or {}
                aid = _scalar(cm, "aweme_id", default=fallback_aid)
                replies = _scalar(cm, "reply_comment_total",
                                  "reply_total", default=0)
                out.append({
                    "platform": "douyin", "kind": "comment",
                    "source_id": str(cid),
                    "url": (f"https://www.douyin.com/video/{aid}"
                            if aid else ""),
                    "entity_name": name, "keyword": keyword,
                    "title": "", "content": text,
                    "author": _scalar(u, "nickname", "name"),
                    "author_id": str(_scalar(u, "uid", "sec_uid")),
                    "author_avatar": _first_avatar(u),
                    "likes": _as_int(_scalar(cm, "digg_count", "like_count",
                                             default=0)),
                    "comments": _as_int(replies),
                    "collects": 0, "shares": 0,
                    "publish_time": str(_scalar(cm, "create_time",
                                                default="")),
                    "location": _scalar(cm, "ip_label", default=""),
                    "tags": [], "image_urls": [], "raw": cm,
                })
    return out


def _comment_count_from_raw(raw_api: list) -> int:
    n = 0
    for entry in raw_api or []:
        body = _body_of(entry)
        if not isinstance(body, dict):
            continue
        for cms in _iter_lists_by_key(
                body, {"comments", "comment_list", "reply_comments"}):
            n += len(cms)
    return n


# ── 浏览器交互(全部从 probe 移植,headless 下大多照样能跑) ────────────
def _comment_panel_present(pg) -> bool:
    try:
        return bool(pg.evaluate(
            """
            (selectors) => {
              const ok = (el) => {
                if (!el) return false;
                const r = el.getBoundingClientRect();
                const s = getComputedStyle(el);
                const w = Math.min(r.right, innerWidth) - Math.max(r.left, 0);
                const h = Math.min(r.bottom, innerHeight) - Math.max(r.top, 0);
                return r.width > 120 && r.height > 120 && w > 80 && h > 80 &&
                  s.display !== 'none' && s.visibility !== 'hidden' &&
                  Number(s.opacity || '1') > 0.05;
              };
              for (const sel of selectors) {
                for (const el of document.querySelectorAll(sel)) {
                  if (ok(el)) return true;
                }
              }
              return false;
            }
            """, list(_COMMENT_SCROLLERS)))
    except Exception:
        return False


def _wait_for_comment_panel(pg, timeout_ms: int = 3000) -> bool:
    deadline = time.time() + timeout_ms / 1000
    while time.time() < deadline:
        if _comment_panel_present(pg):
            return True
        pg.wait_for_timeout(180)
    return _comment_panel_present(pg)


def _reveal_player_controls(pg) -> None:
    for sel in (".xgplayer", "[class*='xgplayer']", "video"):
        try:
            loc = pg.locator(sel).first
            if loc.count() <= 0:
                continue
            box = loc.bounding_box(timeout=1200)
            if not box:
                continue
            pg.mouse.move(box["x"] + box["width"] * 0.72,
                          box["y"] + box["height"] * 0.82)
            pg.wait_for_timeout(700)
            return
        except Exception:
            continue
    try:
        vp = pg.viewport_size or {"width": 1440, "height": 900}
        pg.mouse.move(vp["width"] * 0.55, vp["height"] * 0.78)
        pg.wait_for_timeout(700)
    except Exception:
        pass


def _maybe_click_fullscreen(pg) -> bool:
    """点全屏按钮 / 按 f,详情面板布局更稳。"""
    _reveal_player_controls(pg)
    for sel in (
        ".xgplayer-icon:has(.xg-get-fullscreen)",
        ".xgplayer-icon .xg-get-fullscreen", ".xg-get-fullscreen",
        ".xgplayer-fullscreen", "[aria-label*='全屏']",
        "[title*='全屏']", "button:has-text('全屏')",
    ):
        try:
            _reveal_player_controls(pg)
            loc = pg.locator(sel).first
            if loc.count() <= 0:
                continue
            try:
                loc.scroll_into_view_if_needed(timeout=1800)
            except Exception:
                pass
            try:
                loc.hover(timeout=1200)
            except Exception:
                pass
            loc.click(timeout=1800)
            pg.wait_for_timeout(1600)
            return True
        except Exception:
            continue
    try:
        _reveal_player_controls(pg)
        pg.keyboard.press("f")
        pg.wait_for_timeout(1200)
        return True
    except Exception:
        return False


def _maybe_click_comments(pg) -> bool:
    """显式点开评论按钮(关键:有些详情页评论默认不展开)。"""
    if _wait_for_comment_panel(pg, 800):
        return True
    for sel in (
        "[data-e2e='feed-comment-icon']", "[data-e2e='feed-comment']",
        "[data-e2e='comment-icon']", "[data-e2e='video-comment']",
        "[aria-label*='评论']", "[title*='评论']",
        "button:has-text('评论')", "[role='button']:has-text('评论')",
        "text=评论",
    ):
        try:
            loc = pg.locator(sel).first
            if loc.count() > 0:
                try:
                    loc.scroll_into_view_if_needed(timeout=1800)
                except Exception:
                    pass
                loc.click(timeout=1200)
                if _wait_for_comment_panel(pg, 2200):
                    return True
        except Exception:
            continue
    # 兜底:文本/SVG path 启发式搜索可点位置
    try:
        points = pg.evaluate(
            """
            () => {
              const hasPanel = () => Boolean(
                [...document.querySelectorAll(
                  "[data-e2e='comment-list'],.comment-mainContent")]
                  .some((el) => {
                    const r = el.getBoundingClientRect();
                    const s = getComputedStyle(el);
                    return r.width > 120 && r.height > 120 &&
                      s.display !== 'none' && s.visibility !== 'hidden';
                  })
              );
              if (hasPanel()) return [{ alreadyOpen: true }];
              const visible = (el) => {
                const r = el.getBoundingClientRect();
                const s = getComputedStyle(el);
                return r.width > 6 && r.height > 6 &&
                  s.visibility !== 'hidden' && s.display !== 'none';
              };
              const points = [];
              const add = (el) => {
                let t = el.closest('button,[role="button"],[tabindex],a') || el;
                if (!visible(t)) return;
                const r = t.getBoundingClientRect();
                points.push({
                  x: Math.round(r.left + r.width / 2),
                  y: Math.round(r.top + r.height / 2),
                });
              };
              for (const el of document.querySelectorAll(
                'button,[role="button"],[tabindex],[aria-label],span')) {
                const tx = [el.innerText || '',
                            el.getAttribute('aria-label') || '',
                            el.getAttribute('title') || ''].join(' ');
                if (!/评论/.test(tx) || /评论区|评论列表|暂无评论/.test(tx))
                  continue;
                add(el);
              }
              return points;
            }
            """) or []
        for p in points:
            if p.get("alreadyOpen"):
                return True
            x, y = p.get("x"), p.get("y")
            if not isinstance(x, int) or not isinstance(y, int):
                continue
            pg.mouse.move(x, y)
            pg.mouse.click(x, y)
            if _wait_for_comment_panel(pg, 2200):
                return True
    except Exception:
        pass
    return False


def _move_into_box(pg, box) -> None:
    vp = pg.viewport_size or {"width": 1440, "height": 900}
    x = min(max(box["x"] + box["width"] / 2, 8), vp["width"] - 8)
    y = min(max(box["y"] + min(box["height"] / 2, 260), 8), vp["height"] - 8)
    pg.mouse.move(x, y)


def _bring_comments_into_view(pg) -> bool:
    for _ in range(4):
        for sel in _COMMENT_SCROLLERS:
            try:
                loc = pg.locator(sel).first
                if loc.count() <= 0:
                    continue
                loc.scroll_into_view_if_needed(timeout=5000)
                pg.wait_for_timeout(1800)
                sc = pg.query_selector(sel)
                if sc:
                    box = sc.bounding_box()
                    if box:
                        _move_into_box(pg, box)
                return True
            except Exception:
                continue
        try:
            pg.evaluate(
                "window.scrollBy(0, Math.round(window.innerHeight*0.68))")
            pg.wait_for_timeout(1700)
        except Exception:
            pass
    return False


def _scroll_comment_panel(pg) -> bool:
    """找到真正可滚的容器(用 getComputedStyle 判 overflow)并滚到底。"""
    try:
        handle = pg.evaluate_handle(
            """
            () => {
              const sels = [
                "[data-e2e='comment-list']", ".comment-mainContent",
                "[data-e2e='detail-comment']", ".comment-list", ".ESlRWJ2j"
              ];
              const ok = (el) => {
                if (!el) return false;
                const s = getComputedStyle(el);
                return el.scrollHeight > el.clientHeight + 20 ||
                  /(auto|scroll)/.test(s.overflowY || '');
              };
              for (const sel of sels) {
                let el = document.querySelector(sel);
                while (el && el !== document.body) {
                  if (ok(el) && el.clientHeight > 120) return el;
                  el = el.parentElement;
                }
              }
              return document.scrollingElement || document.documentElement;
            }
            """
        )
        el = handle.as_element()
    except Exception:
        el = None
    try:
        if el:
            box = el.bounding_box()
            if box:
                _move_into_box(pg, box)
            pg.mouse.wheel(0, 2800)
            pg.evaluate(
                "(e)=>{const s=Math.max(900,Math.floor((e.clientHeight||"
                "innerHeight)*1.35));typeof e.scrollBy==='function'?"
                "e.scrollBy(0,s):e.scrollTop+=s;}", el)
            return True
    except Exception:
        pass
    try:
        pg.mouse.wheel(0, 2800)
        pg.keyboard.press("End")
        return True
    except Exception:
        return False


def _dom_comment_count(pg) -> int:
    try:
        return len(pg.query_selector_all(_DOM_COMMENT_ITEM))
    except Exception:
        return 0


def _comments_end_reached(pg) -> bool:
    try:
        return bool(pg.evaluate(
            "() => document.body && "
            "document.body.innerText.includes('暂时没有更多评论')"))
    except Exception:
        return False


def _goto_detail_url(pg, href: str) -> None:
    try:
        pg.goto(href, wait_until="commit", timeout=30000)
    except Exception:
        if "/video/" not in pg.url:
            raise


def _click_or_goto_detail(pg, link, href: str) -> None:
    """优先 SPA 内点击(让 douyin 自己发签名请求),失败回退直接 goto。"""
    try:
        with pg.expect_navigation(timeout=15000):
            link.click()
        return
    except Exception:
        pass
    try:
        link.click()
        pg.wait_for_timeout(1800)
        if "/video/" in pg.url:
            return
    except Exception:
        pass
    if href:
        _goto_detail_url(pg, href)


def _detail_candidates(pg, raw_api, name, keyword) -> list[dict]:
    """搜索页 DOM 链接 + API 已发现的 aweme_id,取并集做深采候选。"""
    cands, seen = [], set()
    try:
        links = pg.query_selector_all("a[href*='/video/']")
    except Exception:
        links = []
    for link in links:
        try:
            href = link.get_attribute("href") or ""
        except Exception:
            continue
        if href.startswith("//"):
            href = "https:" + href
        elif href.startswith("/"):
            href = "https://www.douyin.com" + href
        if not href or href in seen:
            continue
        seen.add(href)
        cands.append({"href": href, "link": link, "source": "dom"})
    for note in _parse_dy_notes(raw_api, name, keyword):
        href = note.get("url") or ""
        if not href or href in seen:
            continue
        seen.add(href)
        cands.append({"href": href, "link": None, "source": "api"})
    return cands


# ── 主流程:_collect ────────────────────────────────────────────────────
def _collect(keyword: str, pick_tab_cb, deep: bool = True) -> dict:
    """Playwright 持久化上下文采集(已对齐 probe 的成熟链路)。

    返回 {logged_in, raw_api(含 __url/__body), tabs[], notes_dom[],
          api_url_count}。raw_api 直接喂给 _parse_dy_notes/_comments。
    """
    from app.agents.web_agent import _STEALTH_JS, _UA, _CHROME_ARGS
    os.makedirs(DY_PROFILE_DIR, exist_ok=True)
    _unlock()
    raw_api: list = []
    url_count = 0
    labels: list = []
    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            ctx = p.chromium.launch_persistent_context(
                user_data_dir=DY_PROFILE_DIR, headless=True,
                args=_CHROME_ARGS,
                ignore_default_args=["--enable-automation"],
                user_agent=_UA, locale="zh-CN",
                viewport={"width": 1440, "height": 900})
            ctx.add_init_script(_STEALTH_JS)

            def _on(resp):
                nonlocal url_count
                u = resp.url
                if not any(p in u for p in _DIAG):
                    return
                matched = any(p in u for p in _LISTEN)
                body = None
                try:
                    body = resp.json()
                except Exception:
                    body = None
                if matched and isinstance(body, dict):
                    raw_api.append({"__url": u, "__status": resp.status,
                                    "__body": body})
                url_count += 1
            ctx.on("response", _on)

            pg = ctx.pages[0] if ctx.pages else ctx.new_page()
            pg.set_default_timeout(15000)

            # 1) 搜索
            pg.goto(_SEARCH.format(kw=quote(keyword)),
                    wait_until="domcontentloaded", timeout=60000)
            pg.wait_for_timeout(_SEARCH_WAIT_MS)
            html = pg.content()
            cards = pg.query_selector_all("a[href*='/video/']")
            if not cards and ("扫码登录" in html or "手机号登录" in html
                              or "验证" in html or "/passport" in pg.url):
                ctx.close()
                return {"logged_in": False, "raw_api": [], "tabs": [],
                        "api_url_count": url_count}

            # AI 选 tab(best-effort)
            try:
                tabs = pg.query_selector_all(
                    "[data-e2e='search-tab'] span, .tab-item, "
                    "div[role='tab']")
                labels = list({(t.inner_text() or "").strip()
                               for t in tabs
                               if (t.inner_text() or "").strip()})
                chosen = pick_tab_cb(labels) if labels else None
                if chosen and chosen != "综合":
                    for t in tabs:
                        if (t.inner_text() or "").strip() == chosen:
                            t.click()
                            pg.wait_for_timeout(3000)
                            break
            except Exception:
                pass

            # 搜索页继续滚,促 API 出货
            for _ in range(_SEARCH_SCROLLS):
                pg.mouse.wheel(0, 2600)
                pg.wait_for_timeout(int(random.uniform(1000, 1800)))
                if (len(pg.query_selector_all("a[href*='/video/']"))
                        >= _MAX_SEARCH_LINKS):
                    break

            # 候选就绪
            deadline = time.time() + _SEARCH_API_WAIT_MS / 1000
            while time.time() < deadline:
                if _detail_candidates(pg, raw_api, "", keyword):
                    break
                pg.wait_for_timeout(800)

            search_url = pg.url

            # 2) 深采
            if deep:
                for idx in range(_DEEP_NOTES):
                    cands = _detail_candidates(pg, raw_api, "", keyword)
                    if idx >= len(cands):
                        break
                    c = cands[idx]
                    href = c["href"]
                    before = _comment_count_from_raw(raw_api)
                    try:
                        if c.get("link"):
                            _click_or_goto_detail(pg, c["link"], href)
                        else:
                            _goto_detail_url(pg, href)
                        try:
                            pg.wait_for_load_state(
                                "domcontentloaded", timeout=30000)
                        except Exception:
                            if "/video/" not in pg.url:
                                raise
                        pg.wait_for_timeout(_DETAIL_WAIT_MS)
                        if _maybe_click_fullscreen(pg):
                            pg.wait_for_timeout(_FULLSCREEN_WAIT_MS)
                        _maybe_click_comments(pg)
                        if _bring_comments_into_view(pg):
                            pg.wait_for_timeout(_COMMENT_WAIT_MS)
                        # 评论滚动:API 增长 + DOM 项增长 + 末尾文案 三轨判停
                        stall = 0
                        prev_api = _comment_count_from_raw(raw_api)
                        prev_dom = _dom_comment_count(pg)
                        for _ in range(_COMMENT_SCROLLS):
                            _scroll_comment_panel(pg)
                            pg.wait_for_timeout(
                                int(random.uniform(1500, 2400)))
                            if _comments_end_reached(pg):
                                break
                            cur_api = _comment_count_from_raw(raw_api)
                            cur_dom = _dom_comment_count(pg)
                            if cur_api > prev_api or cur_dom > prev_dom:
                                prev_api, prev_dom, stall = (
                                    cur_api, cur_dom, 0)
                            else:
                                stall += 1
                                if stall >= _DEEP_STALL:
                                    break
                        pg.wait_for_timeout(2500)        # 末尾包到齐
                        _ = before                       # 仅占位避免 lint
                    except Exception:
                        try:
                            pg.keyboard.press("Escape")
                        except Exception:
                            pass
                    finally:
                        try:
                            pg.goto(search_url,
                                    wait_until="domcontentloaded",
                                    timeout=45000)
                            pg.wait_for_timeout(
                                int(random.uniform(1500, 2300)))
                        except Exception:
                            pass

            ctx.close()
        return {"logged_in": True, "raw_api": raw_api, "tabs": labels,
                "api_url_count": url_count}
    except Exception as e:  # noqa: BLE001
        return {"error": str(e)[:200]}


# ── LLM 解析与对外接口 ─────────────────────────────────────────────────
async def _build_llm() -> LlmClient | None:
    try:
        cfg = await get_agent_settings()
    except Exception:
        cfg = {}
    g = cfg.get("global", {}) if cfg else {}
    a = (cfg.get("agents", {}) or {}).get("douyin_agent", {}) if cfg else {}
    if a and a.get("enabled") is False:
        return None
    key = a.get("api_key") or g.get("api_key") or settings.llm_api_key
    base = a.get("base_url") or g.get("base_url") or settings.llm_api_base
    if not key or not base:
        return None
    model = (a.get("model") or g.get("model")
             or settings.llm_model or "deepseek-chat")
    return LlmClient(api_base=base, api_key=key, model=model,
                     timeout=int(g.get("timeout") or 120))


async def douyin_enrich(entity: dict) -> dict:
    """对某 Place 联抖音采视频+评论 → 证据层 + 体验标签。

    返回 {ok, found, need_login, tags, sentiment, evidence_saved,
          note_count, comment_count, summary}。
    """
    llm = await _build_llm()
    if llm is None:
        return {"ok": False, "summary": "douyin_agent 未配置或停用"}
    name = entity.get("name", "")
    biz = entity.get("biz_need") or f"补全「{name}」真实口碑/体验/热度"
    keyword = f"贵阳 {name}".strip()
    pnk = entity.get("eid") or entity.get("natural_key")

    def _pick(labels):
        try:
            r = llm.chat_json(_TAB_SYS, json.dumps(
                {"业务需求": biz, "可选tab": labels}, ensure_ascii=False))
            return (r or {}).get("tab")
        except Exception:
            return None

    res = await asyncio.to_thread(_collect, keyword, _pick, True)
    if res.get("error"):
        return {"ok": True, "found": False,
                "summary": f"采集异常:{res['error']}"}
    if res.get("logged_in") is False:
        return {"ok": True, "found": False, "need_login": True,
                "summary": "抖音未登录,需一次性人工登录(已升级)"}

    raw = res.get("raw_api") or []
    notes = _parse_dy_notes(raw, name, keyword)
    comments = _parse_dy_comments(raw, name, keyword)
    records = notes + comments
    if not records:
        return {"ok": True, "found": False,
                "summary": f"「{name}」抖音无相关 UGC,跳过"}
    for r in records:
        r["place_natural_key"] = pnk
    try:
        saved = await sa_save_evidence(records)
    except Exception:
        saved = 0
    n_note = len(notes)
    n_cmt = len(comments)

    corpus = json.dumps(
        {"地点": name,
         "视频": [x["title"] for x in notes if x["title"]][:40],
         "评论": [x["content"] for x in comments if x["content"]][:80]},
        ensure_ascii=False)
    try:
        t = await asyncio.to_thread(llm.chat_json, _TAG_SYS, corpus)
    except Exception as e:  # noqa: BLE001
        return {"ok": True, "found": True, "evidence_saved": saved,
                "tags": [], "note_count": n_note, "comment_count": n_cmt,
                "summary": f"证据入库{saved},标签失败:{str(e)[:50]}"}
    tags = [str(x).strip()[:24] for x in (t.get("tags") or [])
            if str(x).strip()]
    return {
        "ok": True, "found": True, "tags": tags[:12],
        "sentiment": t.get("sentiment", ""), "evidence_saved": saved,
        "note_count": n_note, "comment_count": n_cmt,
        "summary": (f"抖音 视频{n_note}+评论{n_cmt} 入证据层(存{saved}) → "
                    f"{len(tags)} 体验标签·口碑{t.get('sentiment','?')}"),
    }