Initial travel knowledge graph release

2026-06-09 09:56:26 +08:00
commit 5f061295d8
402 changed files with 103877 additions and 0 deletions
--- a/scripts/organize_existing_routes_to_md.py
+++ b/scripts/organize_existing_routes_to_md.py
@@ -0,0 +1,544 @@
+from __future__ import annotations
+
+import hashlib
+import json
+import re
+import subprocess
+from collections import Counter
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+
+SOURCE_DIR = Path("/Users/xuexue/Downloads/旅行社业务/2026年新行程打包")
+OUT_DIR = Path("/Users/xuexue/Downloads/旅行社业务/2026年新行程打包_md整理")
+GRAPH_OUT_DIR = Path("/Users/xuexue/Downloads/图谱数据/旅行社项目入库/已有路线产品Markdown")
+
+
+ATTRACTION_ALIASES = {
+    "黄果树": ["黄果树", "黄果树瀑布", "黄果树大瀑布"],
+    "天星桥": ["天星桥", "天星桥景区"],
+    "陡坡塘瀑布": ["陡坡塘", "陡坡塘瀑布"],
+    "荔波小七孔": ["小七孔", "荔波小七孔", "小七孔景区"],
+    "西江千户苗寨": ["西江", "西江苗寨", "西江千户苗寨"],
+    "镇远古城": ["镇远", "镇远古镇", "镇远古城"],
+    "梵净山": ["梵净山"],
+    "青岩古镇": ["青岩", "青岩古镇"],
+    "百里杜鹃": ["百里杜鹃"],
+    "平坝樱花": ["平坝樱花", "平坝农场"],
+    "织金洞": ["织金洞"],
+    "中国天眼": ["天眼", "中国天眼", "FAST"],
+    "茅台镇": ["茅台", "茅台镇"],
+    "遵义会议会址": ["遵义会址", "遵义会议会址"],
+    "兴义万峰林": ["万峰林", "兴义万峰林"],
+    "万峰湖": ["万峰湖"],
+    "马岭河峡谷": ["马岭河", "马岭河峡谷"],
+    "花江大桥": ["花江大桥"],
+    "龙宫": ["龙宫"],
+    "天河潭": ["天河潭"],
+    "甲秀楼": ["甲秀楼"],
+    "黔灵山公园": ["黔灵公园", "黔灵山"],
+    "乌江寨": ["乌江寨"],
+}
+
+
+def clean(value: Any) -> str:
+    if value is None:
+        return ""
+    text = str(value).replace("\x00", "").replace("\u200b", "").replace("\u200f", "")
+    text = re.sub(r"[ \t]+", " ", text)
+    text = re.sub(r"\n{4,}", "\n\n\n", text)
+    return text.strip()
+
+
+def compact(value: Any) -> str:
+    return re.sub(r"\s+", " ", clean(value)).strip()
+
+
+def chinese_count(text: str) -> int:
+    return len(re.findall(r"[\u4e00-\u9fff]", text))
+
+
+def read_legacy_doc_text(path: Path) -> str:
+    """Fallback for old WPS/Word .doc files that macOS textutil cannot decode."""
+    data = path.read_bytes()
+    decoded = data.decode("utf-16le", errors="ignore").replace("\x00", "")
+    anchors = [
+        path.stem,
+        "❀",
+        "贵客黔游",
+        "推荐理由",
+        "推 荐 理 由",
+        "简易行程",
+        "行程安排",
+    ]
+    positions = [decoded.find(anchor) for anchor in anchors if decoded.find(anchor) >= 0]
+    if positions:
+        decoded = decoded[min(positions):]
+    decoded = decoded[:50000]
+    decoded = decoded.replace("\r", "\n")
+    decoded = re.sub(r"[\x01-\x06\x08-\x09\x0b-\x1f]", "", decoded)
+    decoded = re.sub(r"[䀀-俿]{8,}", "", decoded)
+    decoded = re.sub(r"[]{2,}", "", decoded)
+    decoded = re.sub(r"\n{3,}", "\n\n", decoded)
+    return clean(decoded)
+
+
+def read_office_text(path: Path) -> str:
+    proc = subprocess.run(
+        ["textutil", "-convert", "txt", "-stdout", str(path)],
+        check=False,
+        capture_output=True,
+        text=True,
+    )
+    if proc.returncode == 0 and chinese_count(proc.stdout) >= 30:
+        return proc.stdout.replace("\x00", "")
+    fallback = read_legacy_doc_text(path)
+    if chinese_count(fallback) >= 30:
+        return fallback
+    if proc.returncode != 0:
+        return f"[textutil读取失败] {proc.stderr}"
+    return proc.stdout.replace("\x00", "")
+
+
+def safe_filename(text: str, index: int) -> str:
+    base = re.sub(r"[\\/:*?\"<>|]+", "_", compact(text))
+    base = re.sub(r"\s+", "", base)
+    base = base.strip("._ ")
+    digest = hashlib.md5(text.encode("utf-8")).hexdigest()[:8]
+    return f"{index:02d}_{base[:70]}_{digest}.md"
+
+
+def duration_from_text(text: str) -> int | None:
+    for pattern in [r"(\d+)\s*日游", r"(\d+)\s*天", r"(\d+)\s*日"]:
+        m = re.search(pattern, text)
+        if m:
+            return int(m.group(1))
+    cn = {"一": 1, "二": 2, "两": 2, "三": 3, "四": 4, "五": 5, "六": 6, "七": 7, "八": 8, "九": 9, "十": 10}
+    m = re.search(r"([一二两三四五六七八九十])日游", text)
+    if m:
+        return cn.get(m.group(1))
+    return None
+
+
+def product_family(name: str, text: str) -> str:
+    joined = f"{name} {text[:1500]}"
+    if "高端" in joined or "5钻" in joined or "五钻" in joined:
+        return "高端纯玩"
+    if "轻奢" in joined or "2+1" in joined or "保姆车" in joined or "头等舱" in joined:
+        return "轻奢纯玩"
+    if "多彩" in joined:
+        return "多彩贵州"
+    if "经典" in joined:
+        return "经典纯玩"
+    if "1+1" in joined or "游黔途" in joined or "游黔程" in joined:
+        return "游黔途/游黔程"
+    if "独立" in joined or "20-25" in joined:
+        return "独立成团"
+    return "常规纯玩"
+
+
+def hotel_grade_from_text(text: str) -> str:
+    if any(x in text for x in ("5钻", "五钻", "五星", "超五星")):
+        return "5钻/五星"
+    if any(x in text for x in ("4钻", "四钻", "四星")):
+        return "4钻/四星"
+    if "商务" in text:
+        return "商务"
+    if "客栈" in text:
+        return "客栈"
+    return ""
+
+
+def vehicle_from_text(text: str) -> str:
+    if "1+1" in text:
+        return "1+1双排座"
+    if "2+1" in text or "保姆车" in text or "头等舱" in text:
+        return "2+1保姆车/头等舱"
+    if "2+2" in text:
+        return "2+2商务车"
+    if "32-38" in text or "独立成团" in text:
+        return "32-38座2+1大巴"
+    if "旅游大巴" in text or "大巴" in text:
+        return "旅游大巴"
+    if "9座" in text:
+        return "9座商务车"
+    if "7座" in text:
+        return "7座商务车"
+    if "5座" in text:
+        return "5座车"
+    return ""
+
+
+def extract_between(text: str, starts: list[str], ends: list[str], limit: int | None = None) -> str:
+    start_pos = -1
+    for token in starts:
+        pos = text.find(token)
+        if pos >= 0 and (start_pos < 0 or pos < start_pos):
+            start_pos = pos
+    if start_pos < 0:
+        return ""
+    end_pos = len(text)
+    for token in ends:
+        pos = text.find(token, start_pos + 2)
+        if pos >= 0:
+            end_pos = min(end_pos, pos)
+    block = clean(text[start_pos:end_pos])
+    if limit:
+        return block[:limit]
+    return block
+
+
+def parse_simple_itinerary(text: str) -> list[dict[str, str]]:
+    block = extract_between(text, ["简易行程"], ["详细行程", "接待标准", "费用包含", "团费包含"], 2600)
+    layout = "simple"
+    if not block:
+        block = extract_between(text, ["行程安排"], ["接待标准", "费用包含", "团费包含", "特别提醒"], 4200)
+        layout = "schedule"
+    if not block:
+        return []
+    cells = [compact(x) for x in re.split(r"[\x07\t]+", block) if compact(x)]
+    rows: list[dict[str, str]] = []
+    for idx, cell in enumerate(cells):
+        if not re.fullmatch(r"D\s*\d+", cell, flags=re.I):
+            continue
+        if layout == "schedule" and idx + 4 < len(cells):
+            content = cells[idx + 2]
+            attraction_route = "→".join(extract_attractions(content))
+            rows.append({
+                "day": cell.upper().replace(" ", ""),
+                "day_index": re.search(r"\d+", cell).group(),
+                "route": attraction_route or content[:80],
+                "meals": cells[idx + 4],
+                "accommodation": cells[idx + 3],
+            })
+        else:
+            rows.append({
+                "day": cell.upper().replace(" ", ""),
+                "day_index": re.search(r"\d+", cell).group(),
+                "route": cells[idx + 1] if idx + 1 < len(cells) else "",
+                "meals": cells[idx + 2] if idx + 2 < len(cells) else "",
+                "accommodation": cells[idx + 3] if idx + 3 < len(cells) else "",
+            })
+    return rows
+
+
+def extract_day_segments(text: str) -> list[dict[str, str]]:
+    source = extract_between(
+        text,
+        ["详细行程", "行程安排"],
+        ["· 接待标准", "接待标准", "· 费用包含", "费用包含", "团费包含", "参团须知", "· 温馨提示"],
+    )
+    if len(source) < 50:
+        source = text
+    matches = list(re.finditer(r"(?m)(D\s*\d+|第[一二三四五六七八九十]+天)[：:、\s]*", source))
+    if not matches:
+        return []
+    cn_map = {"一": 1, "二": 2, "两": 2, "三": 3, "四": 4, "五": 5, "六": 6, "七": 7, "八": 8, "九": 9, "十": 10}
+    rows: list[dict[str, str]] = []
+    for idx, match in enumerate(matches):
+        if idx + 1 < len(matches):
+            end = matches[idx + 1].start()
+        else:
+            end = len(source)
+            for token in ["· 接待标准", "接待标准", "· 费用包含", "费用包含", "团费包含", "参团须知", "温馨提示"]:
+                pos = source.find(token, match.start() + 2)
+                if pos >= 0:
+                    end = min(end, pos)
+        token = match.group(1)
+        if token.startswith("D"):
+            day_index = int(re.search(r"\d+", token).group())
+        else:
+            day_index = cn_map.get(re.search(r"第(.+?)天", token).group(1), idx + 1)
+        body = clean(source[match.start():end])
+        lines = [compact(x) for x in body.splitlines() if compact(x)]
+        title = next((line for line in lines[:4] if not line.startswith("第") and not re.fullmatch(r"D\s*\d+", line, flags=re.I)), f"D{day_index}")
+        rows.append({"day": f"D{day_index}", "day_index": str(day_index), "title": title, "body": body})
+    return rows[:12]
+
+
+def extract_attractions(text: str) -> list[str]:
+    found: list[str] = []
+    for name, aliases in ATTRACTION_ALIASES.items():
+        if any(alias in text for alias in aliases) and name not in found:
+            found.append(name)
+    return found
+
+
+def sentence_snippets(text: str, keywords: list[str], limit: int = 20) -> list[str]:
+    snippets: list[str] = []
+    for raw in re.split(r"[。！？；;\n]+", text):
+        line = compact(raw)
+        if len(line) < 4:
+            continue
+        if any(keyword in line for keyword in keywords):
+            snippets.append(line[:260])
+        if len(snippets) >= limit:
+            break
+    seen: set[str] = set()
+    out: list[str] = []
+    for item in snippets:
+        if item not in seen:
+            seen.add(item)
+            out.append(item)
+    return out
+
+
+def md_table(headers: list[str], rows: list[list[Any]]) -> str:
+    def cell(value: Any) -> str:
+        return compact(value).replace("|", "\\|")
+    lines = ["| " + " | ".join(headers) + " |", "| " + " | ".join(["---"] * len(headers)) + " |"]
+    for row in rows:
+        lines.append("| " + " | ".join(cell(v) for v in row) + " |")
+    return "\n".join(lines)
+
+
+def code_block(text: str) -> str:
+    return "```text\n" + clean(text).replace("```", "'''") + "\n```"
+
+
+def source_files() -> list[Path]:
+    return sorted(
+        p for p in SOURCE_DIR.iterdir()
+        if p.is_file()
+        and p.suffix.lower() in {".doc", ".docx"}
+        and not p.name.startswith((".", "~$", ".~"))
+    )
+
+
+def build_markdown(path: Path, index: int) -> tuple[str, dict[str, Any]]:
+    text = read_office_text(path)
+    lines = [compact(x) for x in text.splitlines() if compact(x)]
+    name = lines[0] if lines else path.stem
+    if len(name) < 3 or "INCLUDEPICTURE" in name:
+        name = path.stem
+    duration = duration_from_text(f"{path.stem} {name} {text[:300]}")
+    family = product_family(name, text)
+    vehicle = vehicle_from_text(f"{path.stem} {text[:2200]}")
+    hotel_grade = hotel_grade_from_text(f"{path.stem} {text[:2600]}")
+    simple_rows = parse_simple_itinerary(text)
+    day_segments = extract_day_segments(text)
+    route_text = " ".join(row.get("route", "") for row in simple_rows) or " ".join(row.get("title", "") for row in day_segments)
+    attractions = extract_attractions(f"{name} {path.stem} {route_text}")
+    if not attractions:
+        attractions = extract_attractions(" ".join(day.get("body", "")[:1200] for day in day_segments))
+    selling_points = sentence_snippets(text, ["核心卖点", "甄选", "赠送", "超值", "纯玩", "0购物", "车型"], limit=12)
+    fees = sentence_snippets(text, ["费用", "不含", "自理", "电瓶车", "观光车", "环保车", "保险", "扶梯", "索道", "餐标", "单房差", "儿童价"], limit=30)
+    rules = sentence_snippets(text, ["老人", "儿童", "学生", "军人", "退团", "退费", "不可抗力", "预约", "投诉", "意见单", "满房", "同级", "孕妇", "不接待"], limit=30)
+    hotel_block = extract_between(text, ["酒店"], ["导游服务", "购物", "意见单填写", "温馨提示"], 3200)
+    gift_block = extract_between(text, ["赠送服务", "赠送："], ["简易行程", "详细行程", "接待标准"], 1800)
+    reception_block = extract_between(text, ["接待标准"], ["温馨提示", "· 温馨提示"], 4200)
+
+    frontmatter = {
+        "doc_type": "existing_route_product_markdown",
+        "schema_target": "travel_agency_existing_product",
+        "route_immutable": True,
+        "source_file": str(path),
+        "source_filename": path.name,
+        "product_name": name,
+        "duration_days": duration,
+        "product_family": family,
+        "default_vehicle_type": vehicle,
+        "default_hotel_grade": hotel_grade,
+        "core_attractions": attractions,
+    }
+    rows = [[r["day"], r.get("route", ""), r.get("meals", ""), r.get("accommodation", "")] for r in simple_rows]
+    if not rows:
+        rows = [[r["day"], r.get("title", ""), "", ""] for r in day_segments]
+
+    parts: list[str] = []
+    parts.append("---")
+    parts.append(json.dumps(frontmatter, ensure_ascii=False, indent=2))
+    parts.append("---")
+    parts.append(f"# {name}")
+    parts.append("")
+    parts.append("## 1. 产品识别")
+    parts.append(md_table(
+        ["字段", "值"],
+        [
+            ["源文件", str(path)],
+            ["产品名称", name],
+            ["天数", duration or ""],
+            ["产品系列/类型", family],
+            ["默认车型", vehicle],
+            ["默认酒店等级", hotel_grade],
+            ["路线是否固定", "是；客户微调只能改资源槽位，不能改天数、景点顺序、城市移动路径"],
+            ["核心景点候选", "、".join(attractions)],
+        ],
+    ))
+    parts.append("")
+    parts.append("## 2. 固定路线骨架")
+    parts.append("")
+    parts.append("抽取目标：`TourProduct -> ProductDay -> RouteStop / RouteSegment`。")
+    parts.append("")
+    parts.append(md_table(["天数", "路线/标题", "用餐", "住宿"], rows))
+    parts.append("")
+    parts.append("## 3. 每日详细行程")
+    parts.append("")
+    parts.append("抽取目标：每天生成 `ProductDay`，从正文识别真实 `RouteStop` 和 `RouteSegment`；不要把费用说明里的景点当作真实停靠点。")
+    for day in day_segments:
+        idx = day["day_index"]
+        simple = next((row for row in simple_rows if row["day_index"] == idx), {})
+        parts.append("")
+        parts.append(f"### {day['day']} {simple.get('route') or day.get('title') or ''}")
+        parts.append("")
+        parts.append(md_table(
+            ["字段", "值"],
+            [
+                ["day_index", idx],
+                ["route_path", simple.get("route", "")],
+                ["meal_text", simple.get("meals", "")],
+                ["accommodation_text", simple.get("accommodation", "")],
+            ],
+        ))
+        parts.append("")
+        parts.append(code_block(day["body"]))
+    parts.append("")
+    parts.append("## 4. 可配置资源槽位候选")
+    parts.append("")
+    parts.append("抽取目标：`ResourceSlot -> ResourceOptionGroup -> Hotel/Restaurant/Vehicle/TicketFee/GiftService`。")
+    parts.append("")
+    parts.append("### 4.1 住宿槽位候选")
+    parts.append("")
+    if hotel_block:
+        parts.append(code_block(hotel_block))
+    else:
+        parts.append("- 原文未明确独立酒店参考段；可从每日住宿列生成住宿槽位。")
+    parts.append("")
+    parts.append("### 4.2 餐饮槽位候选")
+    parts.append("")
+    meal_lines = sentence_snippets(text, ["用餐", "餐标", "早餐", "中餐", "晚餐", "长桌宴", "酸汤鱼"], limit=20)
+    parts.extend(f"- {line}" for line in meal_lines or ["原文未明确独立餐饮段；可从每日用餐列生成餐饮槽位。"])
+    parts.append("")
+    parts.append("### 4.3 车辆/交通槽位候选")
+    parts.append("")
+    traffic_lines = sentence_snippets(text, ["交通", "车型", "用车", "车辆", "保姆车", "商务车", "大巴", "接人", "送站", "接站"], limit=20)
+    parts.extend(f"- {line}" for line in traffic_lines or ["原文未明确独立车辆段；可从产品名和行程交通描述识别默认车型。"])
+    parts.append("")
+    parts.append("### 4.4 门票/小交通/保险槽位候选")
+    parts.append("")
+    ticket_lines = sentence_snippets(text, ["门票", "观光车", "环保车", "电瓶车", "保险", "扶梯", "索道", "游船", "小交通", "景交"], limit=30)
+    parts.extend(f"- {line}" for line in ticket_lines or ["原文未明确门票小交通段。"])
+    parts.append("")
+    parts.append("### 4.5 赠送服务槽位候选")
+    parts.append("")
+    if gift_block:
+        parts.append(code_block(gift_block))
+    else:
+        gift_lines = sentence_snippets(text, ["赠送", "旅拍", "矿泉水", "长桌宴", "高山流水", "打糍粑"], limit=12)
+        parts.extend(f"- {line}" for line in gift_lines or ["原文未明确赠送服务。"])
+    parts.append("")
+    parts.append("## 5. 费用与规则候选")
+    parts.append("")
+    parts.append("抽取目标：`ProductPricePackage`、`TicketFee`、`FeeItem`、`BusinessRule`。")
+    parts.append("")
+    parts.append("### 5.1 费用候选")
+    parts.extend(f"- {line}" for line in fees or ["未识别到明显费用候选。"])
+    parts.append("")
+    parts.append("### 5.2 业务规则候选")
+    parts.extend(f"- {line}" for line in rules or ["未识别到明显规则候选。"])
+    if reception_block:
+        parts.append("")
+        parts.append("### 5.3 接待标准/服务规则原文")
+        parts.append(code_block(reception_block))
+    parts.append("")
+    parts.append("## 6. 原文保留")
+    parts.append("")
+    parts.append("后续抽取如果结构化段落不够，可回到本段原文补证据。")
+    parts.append("")
+    parts.append(code_block(text))
+    markdown = "\n".join(parts).strip() + "\n"
+    meta = {
+        **frontmatter,
+        "markdown_filename": "",
+        "simple_day_count": len(simple_rows),
+        "detailed_day_count": len(day_segments),
+        "text_chars": len(clean(text)),
+    }
+    return markdown, meta
+
+
+def write_index(items: list[dict[str, Any]]) -> None:
+    summary_rows = [
+        [
+            item["product_name"],
+            item.get("duration_days") or "",
+            item.get("product_family") or "",
+            item.get("default_vehicle_type") or "",
+            item.get("default_hotel_grade") or "",
+            "、".join(item.get("core_attractions") or []),
+            item["markdown_filename"],
+        ]
+        for item in items
+    ]
+    counts = Counter(item.get("duration_days") for item in items)
+    family_counts = Counter(item.get("product_family") for item in items)
+    lines = [
+        "# 已有路线产品 Markdown 整理索引",
+        "",
+        f"生成时间：{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
+        "",
+        f"- 源目录：`{SOURCE_DIR}`",
+        f"- 输出目录：`{OUT_DIR}`",
+        f"- 产品文档数：{len(items)}",
+        "",
+        "## 按天数统计",
+        "",
+        *[f"- {day or '未知'}天：{count} 个" for day, count in sorted(counts.items(), key=lambda x: (x[0] is None, x[0] or 0))],
+        "",
+        "## 按产品类型统计",
+        "",
+        *[f"- {name}：{count} 个" for name, count in family_counts.most_common()],
+        "",
+        "## 产品索引",
+        "",
+        md_table(["产品名称", "天数", "产品类型", "默认车型", "默认酒店", "核心景点候选", "Markdown文件"], summary_rows),
+        "",
+        "## 后续抽取建议",
+        "",
+        "- 先抽取 `TourProduct`、`ProductDay`、`RouteStop`、`RouteSegment`，确保路线骨架稳定。",
+        "- 再抽取 `ResourceSlot`，把住宿、餐饮、车辆、接送、门票小交通、赠送服务作为可配置槽位。",
+        "- 产品文档里“费用说明/自理项目/不含”出现的景点名称，不要抽成真实路线停靠点。",
+        "- 酒店参考段优先抽成 `ResourceOptionGroup`，不要强行抽成唯一入住酒店。",
+        "- 规则要挂到影响对象：退改挂产品/价格包，优惠挂门票费用，少走路/预约风险挂景点或停靠点，可替换规则挂资源槽位。",
+    ]
+    (OUT_DIR / "README_已有路线产品md整理.md").write_text("\n".join(lines), encoding="utf-8")
+    (OUT_DIR / "产品索引.json").write_text(json.dumps(items, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def main() -> dict[str, Any]:
+    OUT_DIR.mkdir(parents=True, exist_ok=True)
+    GRAPH_OUT_DIR.mkdir(parents=True, exist_ok=True)
+    product_dir = OUT_DIR / "products"
+    product_dir.mkdir(parents=True, exist_ok=True)
+    items: list[dict[str, Any]] = []
+    for index, path in enumerate(source_files(), start=1):
+        markdown, meta = build_markdown(path, index)
+        filename = safe_filename(meta["product_name"] or path.stem, index)
+        meta["markdown_filename"] = f"products/{filename}"
+        (product_dir / filename).write_text(markdown, encoding="utf-8")
+        items.append(meta)
+    write_index(items)
+
+    # Keep a synchronized copy under 图谱数据 for extraction experiments.
+    for src in [OUT_DIR / "README_已有路线产品md整理.md", OUT_DIR / "产品索引.json"]:
+        (GRAPH_OUT_DIR / src.name).write_text(src.read_text(encoding="utf-8"), encoding="utf-8")
+    graph_products = GRAPH_OUT_DIR / "products"
+    graph_products.mkdir(parents=True, exist_ok=True)
+    for md in product_dir.glob("*.md"):
+        (graph_products / md.name).write_text(md.read_text(encoding="utf-8"), encoding="utf-8")
+
+    summary = {
+        "source_dir": str(SOURCE_DIR),
+        "output_dir": str(OUT_DIR),
+        "graph_output_dir": str(GRAPH_OUT_DIR),
+        "product_markdown_count": len(items),
+        "generated_at": datetime.now().isoformat(timespec="seconds"),
+    }
+    (OUT_DIR / "生成摘要.json").write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8")
+    (GRAPH_OUT_DIR / "生成摘要.json").write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8")
+    return summary
+
+
+if __name__ == "__main__":
+    print(json.dumps(main(), ensure_ascii=False, indent=2))