330 lines
17 KiB
Python
330 lines
17 KiB
Python
from __future__ import annotations
|
||
|
||
import json
|
||
import shutil
|
||
from copy import deepcopy
|
||
from pathlib import Path
|
||
|
||
import psycopg
|
||
from psycopg.rows import dict_row
|
||
from psycopg.types.json import Jsonb
|
||
|
||
from app.config import settings
|
||
from common_paths import PROJECT_ROOT, TRAVEL_KG_EXPORT_ROOT
|
||
|
||
PROJECT_ID = "travel_agency_2_0_test"
|
||
TENANT_ID = "travel_agency"
|
||
GRAPH_NAME = "travel_agency_2_0_test"
|
||
NAMESPACE = "travel_agency_2_0"
|
||
SCHEMA_DIR = PROJECT_ROOT / "schema搭建/travel_agency_2_0_test"
|
||
DOWNLOAD_DIR = TRAVEL_KG_EXPORT_ROOT / "travel_agency_2_0_test_旅行社2.0测试"
|
||
CURRENT_JSON = SCHEMA_DIR / "travel_agency_2_0_schema.current.json"
|
||
|
||
|
||
def add_fields(entity: dict, fields: list[str], after: str | None = None) -> None:
|
||
current = list(entity.get("fields") or [])
|
||
insert_at = len(current)
|
||
if after and after in current:
|
||
insert_at = current.index(after) + 1
|
||
for field in fields:
|
||
if field not in current:
|
||
current.insert(insert_at, field)
|
||
insert_at += 1
|
||
entity["fields"] = current
|
||
|
||
|
||
def schema_to_dsl(payload: dict) -> str:
|
||
lines: list[str] = [
|
||
"```text",
|
||
f"namespace {payload.get('namespace', NAMESPACE)}",
|
||
f"version {payload.get('version', '')}",
|
||
"",
|
||
"// 2.3:采纳最终意见中的 POI 拆分、scenic_id 主景区挂载、Activity 纠偏;保留关系属性绑定,不恢复 Binding 实体。",
|
||
]
|
||
for name, meta in payload.get("entity_types", {}).items():
|
||
lines.append("")
|
||
lines.append(f"entity {name} // {meta.get('definition', '')}")
|
||
if meta.get("primary_key"):
|
||
lines.append(f" primary_key {meta['primary_key']}")
|
||
for field in meta.get("fields", []):
|
||
lines.append(f" property {field}: Text")
|
||
for name, meta in payload.get("relation_types", {}).items():
|
||
lines.append("")
|
||
lines.append(
|
||
f"relation {name}: {meta.get('from', '')} -> {meta.get('to', '')} // {meta.get('definition', '')}"
|
||
)
|
||
for prop in meta.get("properties", []):
|
||
lines.append(f" property {prop}: Text")
|
||
lines.extend(["", "// design_principles"])
|
||
for item in payload.get("design_principles", []):
|
||
lines.append(f"- {item}")
|
||
lines.extend(["", "// revision_notes"])
|
||
for item in payload.get("revision_notes", []):
|
||
lines.append(f"- {item}")
|
||
lines.extend(["", "// query_recipes"])
|
||
for name, query in payload.get("query_recipes", {}).items():
|
||
lines.append(f"{name}:")
|
||
lines.append(query)
|
||
lines.append("```")
|
||
return "\n".join(lines) + "\n"
|
||
|
||
|
||
def build_schema() -> dict:
|
||
schema = deepcopy(json.loads(CURRENT_JSON.read_text(encoding="utf-8")))
|
||
schema["version"] = "2.3"
|
||
schema["display_name"] = "旅行社2.3固定线路 POI 与主景区费用图谱 Schema"
|
||
schema["purpose"] = (
|
||
"面向旅行社固定线路与小包团销售:路线固定、站点有序;Hotel/Restaurant 为独立 POI;"
|
||
"TravelItem 仅承载非 POI 服务/费用;景区费用项统一用 scenic_id 挂到主景区;"
|
||
"默认/必付/可选资源状态继续放在 RouteStop/Product 到资源的关系属性中,不恢复 ItineraryResourceBinding 实体;"
|
||
"行政区继续属性化,同时预留 region_id,避免行政区层级把图谱撑乱。"
|
||
)
|
||
|
||
enums = schema.setdefault("enums", {})
|
||
enums["TravelItem.type"] = [
|
||
"Ticket",
|
||
"ScenicTransport",
|
||
"Insurance",
|
||
"Vehicle",
|
||
"GiftService",
|
||
"SmallGroupPackage",
|
||
"Shopping",
|
||
"Service",
|
||
"VideoGuide",
|
||
"Guide",
|
||
"Pickup",
|
||
"Other",
|
||
]
|
||
enums["ResourceRefType"] = ["Hotel", "Restaurant", "TravelItem"]
|
||
enums["ResourceStatus"] = ["INCLUDED", "OPTIONAL", "MANDATORY"]
|
||
enums["RouteStop.business_filter"] = ["ROUTE_NODE", "RESOURCE_ONLY", "TEXT_NOTE"]
|
||
|
||
entity_types = schema.setdefault("entity_types", {})
|
||
add_fields(entity_types["ScenicAttraction"], ["short_name", "region_id"], after="name")
|
||
add_fields(entity_types["SubAttraction"], ["parent_attraction_id"], after="parent_attraction_name")
|
||
add_fields(
|
||
entity_types["RouteStop"],
|
||
[
|
||
"scenic_id",
|
||
"main_attraction",
|
||
"main_attraction_id",
|
||
"sub_attractions",
|
||
"region_text",
|
||
"business_filter",
|
||
],
|
||
after="stop_type",
|
||
)
|
||
add_fields(entity_types["Hotel"], ["region_id", "geo"], after="region_name")
|
||
add_fields(entity_types["Restaurant"], ["region_id", "geo"], after="region_name")
|
||
add_fields(
|
||
entity_types["TravelItem"],
|
||
["scenic_id", "raw_evidence", "original_type", "normalized_type_reason"],
|
||
after="scenic_name",
|
||
)
|
||
entity_types["TravelItem"]["definition"] = (
|
||
"非 POI 的统一服务/费用资源。门票、小交通、保险、景区二消、车辆、赠送服务、小包团套餐等进入该类型;"
|
||
"景区相关费用必须优先填写 scenic_id 并挂主景区;Hotel/Restaurant 不放入 TravelItem。"
|
||
)
|
||
entity_types["Hotel"]["definition"] = (
|
||
"独立酒店 POI。酒店有地址、经纬度、图片、星级、房型和淡旺季/周末价格,可跨多条线路复用。"
|
||
)
|
||
entity_types["Restaurant"]["definition"] = (
|
||
"独立餐厅 POI。餐厅有地址、菜系、餐标、容量和图片,可跨线路复用并作为行程用餐节点资源。"
|
||
)
|
||
|
||
relation_types = schema.setdefault("relation_types", {})
|
||
for rel_name in ["STOP_USES_DEFAULT_RESOURCE", "PRODUCT_USES_DEFAULT_RESOURCE"]:
|
||
props = list(relation_types[rel_name].get("properties") or [])
|
||
for prop in ["binding_id", "product_id", "day_index", "route_stop_id"]:
|
||
if prop not in props:
|
||
props.insert(0, prop)
|
||
relation_types[rel_name]["properties"] = props
|
||
relation_types["ATTRACTION_HAS_ITEM"]["definition"] = (
|
||
"主景区拥有的非 POI 服务/费用项,例如门票、小交通、保险、二消、视频讲解。费用项只挂主景区,不挂子景点。"
|
||
)
|
||
props = list(relation_types["ATTRACTION_HAS_ITEM"].get("properties") or [])
|
||
for prop in ["scenic_id", "raw_evidence", "normalized_type_reason"]:
|
||
if prop not in props:
|
||
props.append(prop)
|
||
relation_types["ATTRACTION_HAS_ITEM"]["properties"] = props
|
||
|
||
schema["query_recipes"] = {
|
||
"route_bus_like_view": "MATCH (p:TourProduct)-[:PRODUCT_HAS_DAY]->(d:ProductDay)-[:DAY_HAS_STOP]->(s:RouteStop) WHERE p.product_id=$product_id AND coalesce(s.business_filter,'ROUTE_NODE')='ROUTE_NODE' RETURN p,d,s ORDER BY d.day_no,s.order_no",
|
||
"default_resources_for_stop": "MATCH (s:RouteStop)-[r:STOP_USES_DEFAULT_RESOURCE]->(resource) WHERE s.stop_id=$stop_id RETURN s,r,labels(resource) AS resource_labels,resource ORDER BY r.default_rank",
|
||
"nearby_hotels_restaurants": "MATCH (s:RouteStop)-[:STOP_VISITS_ATTRACTION]->(a:ScenicAttraction)-[near:ATTRACTION_NEARBY_RESOURCE]->(resource) WHERE s.stop_id=$stop_id AND near.resource_type IN $types RETURN a,near,labels(resource) AS resource_labels,resource ORDER BY near.is_partner DESC, near.driving_minutes ASC, near.rank ASC",
|
||
"scenic_fee_items_by_scenic_id": "MATCH (a:ScenicAttraction)-[r:ATTRACTION_HAS_ITEM]->(item:TravelItem) WHERE a.attraction_id=$scenic_id OR item.scenic_id=$scenic_id RETURN a,r,item ORDER BY item.type,item.name",
|
||
"route_resource_bindings": "MATCH (p:TourProduct)-[:PRODUCT_HAS_DAY]->(d:ProductDay)-[:DAY_HAS_STOP]->(s:RouteStop)-[r:STOP_USES_DEFAULT_RESOURCE]->(resource) WHERE p.product_id=$product_id RETURN p,d,s,r,labels(resource) AS resource_labels,resource ORDER BY d.day_no,s.order_no,r.default_rank",
|
||
"hotel_room_price_rules": "MATCH (hotel:Hotel)-[:HOTEL_HAS_ROOM_TYPE]->(room:HotelRoomType)-[r:ROOM_TYPE_HAS_PRICE_RULE]->(price:ResourcePriceRule) WHERE hotel.hotel_id=$hotel_id RETURN hotel,room,r,price ORDER BY room.room_name,price.season,price.event_type",
|
||
"restaurant_price_rules": "MATCH (restaurant:Restaurant)-[r:RESTAURANT_HAS_PRICE_RULE]->(price:ResourcePriceRule) WHERE restaurant.restaurant_id=$restaurant_id RETURN restaurant,r,price ORDER BY price.event_type,price.date_range",
|
||
"service_price_rules": "MATCH (item:TravelItem)-[r:ITEM_HAS_PRICE_RULE]->(price:ResourcePriceRule) WHERE item.item_id=$item_id RETURN item,r,price ORDER BY price.event_type,price.date_range",
|
||
}
|
||
schema["design_principles"] = [
|
||
"路线像公交线路:TourProduct -> ProductDay -> RouteStop -> ScenicAttraction,站点顺序固定,不让推荐逻辑随意改线。",
|
||
"POI 与费用分层:Hotel、Restaurant、ScenicAttraction/SubAttraction 是有物理位置的 POI;Ticket、ScenicTransport、Insurance、Vehicle、GiftService、SmallGroupPackage 等是 TravelItem。",
|
||
"Activity 纠偏:扶梯、观光车、电瓶车、索道、游船等历史上误分到 Activity 的项目统一归一为 ScenicTransport;非交通体验可放 Service/GiftService/Shopping。",
|
||
"费用挂主景区:Ticket、ScenicTransport、Insurance、景区二消等 TravelItem 必须填 scenic_id,且只挂主景区,不挂子景点。",
|
||
"默认配置轻量化:不恢复 ItineraryResourceBinding 实体;默认/必付/可选状态由 STOP_USES_DEFAULT_RESOURCE/PRODUCT_USES_DEFAULT_RESOURCE 的关系属性表达。",
|
||
"资源替换路径:住宿/餐饮的替换推荐走 ScenicAttraction -> ATTRACTION_NEARBY_RESOURCE -> Hotel/Restaurant。",
|
||
"行政区轻量化:不恢复 AdministrativeRegion 实体;province/city/county/town/region_name/region_id 作为属性用于过滤和后续数据补全。",
|
||
"RouteStop 清洁:小吃街、夜市、长桌宴、餐馆等资源型文本不应进入路线站点;只保留出发/城市/景区/交通/返程等路线节点。",
|
||
"价格规则统一:TravelItem、HotelRoomType、Restaurant 的价格都通过 ResourcePriceRule 表达。",
|
||
"数据可追溯:保留 source_file/source_row/raw_evidence/data_quality,方便服务商补正。",
|
||
]
|
||
schema["not_in_scope_for_2_3"] = [
|
||
"CustomerSelection 暂不入图谱;客户最终选择放订单/会话侧。",
|
||
"AdministrativeRegion 暂不作为实体层级;当前用 region_id 和行政区属性过滤。",
|
||
"ItineraryResourceBinding 暂不作为实体;当前用关系属性表达绑定,后期订单级选择复杂后再升级。",
|
||
"SubAttraction 不承接费用项;费用项统一 scenic_id 挂主景区。",
|
||
"Inventory/InventorySnapshot 暂不作为实体;当前仅标记 inventory_status/requires_supplier_confirm。",
|
||
]
|
||
schema["revision_notes"] = [
|
||
"采纳 Hotel/Restaurant 独立 POI,并保留 2.2 的 HotelRoomType/RestaurantPriceRule 设计。",
|
||
"采纳 fee 加 scenic_id,景区费用项通过 ATTRACTION_HAS_ITEM 挂主景区。",
|
||
"采纳 Activity 纠偏:删除 TravelItem.type 中的 Activity,交通/游览类二消统一使用 ScenicTransport。",
|
||
"部分采纳 Binding 意见:不恢复 ItineraryResourceBinding 实体,但在默认资源关系上增加 binding_id/resource_type/resource_id/product_id/day_index/route_stop_id。",
|
||
"不采纳 AdministrativeRegion 实体化:当前业务优先查询路线、景区、酒店、餐饮和费用,行政区作为属性更利于减少关系数量。",
|
||
"数据管线下一步需要输出 route_product/day/stop、fee_item、hotel_poi、restaurant_poi、scenic_items_summary 等模板文件。",
|
||
]
|
||
return schema
|
||
|
||
|
||
def write_schema_files(schema: dict) -> dict[str, str]:
|
||
SCHEMA_DIR.mkdir(parents=True, exist_ok=True)
|
||
DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||
paths = {
|
||
"current_json": SCHEMA_DIR / "travel_agency_2_0_schema.current.json",
|
||
"current_dsl": SCHEMA_DIR / "travel_agency_2_0_schema.current.dsl.md",
|
||
"v_json": SCHEMA_DIR / "travel_agency_2_0_schema.v2_3.json",
|
||
"v_dsl": SCHEMA_DIR / "travel_agency_2_0_schema.v2_3.dsl.md",
|
||
"sample_json": SCHEMA_DIR / "travel_agency_2_0_schema.v2_3.original_sample.json",
|
||
"sample_dsl": SCHEMA_DIR / "travel_agency_2_0_schema.v2_3.original_sample.dsl.md",
|
||
}
|
||
json_text = json.dumps(schema, ensure_ascii=False, indent=2)
|
||
dsl_text = schema_to_dsl(schema)
|
||
for path in paths.values():
|
||
path.write_text(json_text if path.suffix == ".json" else dsl_text, encoding="utf-8")
|
||
shutil.copy2(path, DOWNLOAD_DIR / path.name)
|
||
readme = """# 旅行社2.0测试 Schema
|
||
|
||
当前 active 版本:`2.3`。
|
||
|
||
2.3 采纳内容:
|
||
- `Hotel`、`Restaurant` 独立 POI。
|
||
- `TravelItem` 只保留非 POI 服务/费用。
|
||
- `TravelItem.scenic_id` 挂主景区,景区费用项走 `ATTRACTION_HAS_ITEM`。
|
||
- `Activity` 不再作为核心类型;扶梯/观光车/电瓶车/索道/游船统一归 `ScenicTransport`。
|
||
- 默认资源绑定继续走关系属性,增加 `binding_id/resource_type/resource_id/product_id/day_index/route_stop_id`。
|
||
- 行政区不单独建实体,保留 `region_id` 与行政区属性。
|
||
"""
|
||
(SCHEMA_DIR / "README.md").write_text(readme, encoding="utf-8")
|
||
(DOWNLOAD_DIR / "README.md").write_text(readme, encoding="utf-8")
|
||
return {key: str(path) for key, path in paths.items()}
|
||
|
||
|
||
def publish_schema(schema: dict) -> int:
|
||
with psycopg.connect(settings.database_url, row_factory=dict_row) as conn:
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
f"""
|
||
UPDATE {settings.db_schema}.ontology_schemas
|
||
SET status='published', updated_at=now()
|
||
WHERE tenant_id=%s AND project_id=%s AND status='active'
|
||
""",
|
||
(TENANT_ID, PROJECT_ID),
|
||
)
|
||
cur.execute(
|
||
f"""
|
||
INSERT INTO {settings.db_schema}.ontology_schemas (
|
||
tenant_id, project_id, namespace, version, display_name, description,
|
||
status, schema_jsonb, created_by, published_by, published_at, updated_at
|
||
)
|
||
VALUES (%s,%s,%s,%s,%s,%s,'active',%s,%s,%s,now(),now())
|
||
RETURNING id
|
||
""",
|
||
(
|
||
TENANT_ID,
|
||
PROJECT_ID,
|
||
NAMESPACE,
|
||
5,
|
||
schema["display_name"],
|
||
schema["purpose"],
|
||
Jsonb(schema),
|
||
"codex",
|
||
"codex",
|
||
),
|
||
)
|
||
schema_id = cur.fetchone()["id"]
|
||
cur.execute(
|
||
f"""
|
||
UPDATE {settings.db_schema}.graph_releases
|
||
SET schema_id=%s,
|
||
graph_release_id=%s,
|
||
graph_name=%s,
|
||
status='active',
|
||
activated_at=now(),
|
||
updated_at=now(),
|
||
metadata_jsonb = coalesce(metadata_jsonb, '{{}}'::jsonb) || %s::jsonb
|
||
WHERE tenant_id=%s AND project_id=%s AND alias='active'
|
||
""",
|
||
(
|
||
schema_id,
|
||
"travel_agency_2_0_test_v2_3",
|
||
GRAPH_NAME,
|
||
Jsonb({"schema_version": "2.3", "schema_id": schema_id}),
|
||
TENANT_ID,
|
||
PROJECT_ID,
|
||
),
|
||
)
|
||
cur.execute(
|
||
f"""
|
||
INSERT INTO {settings.db_schema}.import_templates (
|
||
template_id, version, display_name, primary_entity, template_jsonb, status, updated_at
|
||
)
|
||
VALUES (%s, 4, %s, 'TourProduct', %s, 'active', now())
|
||
ON CONFLICT (template_id, version) DO UPDATE
|
||
SET display_name=EXCLUDED.display_name,
|
||
primary_entity=EXCLUDED.primary_entity,
|
||
template_jsonb=EXCLUDED.template_jsonb,
|
||
status=EXCLUDED.status,
|
||
updated_at=now()
|
||
""",
|
||
(
|
||
"travel_agency_2_0_fixed_route_nearby_v1",
|
||
"旅行社2.3固定线路 POI 与主景区费用导入模板",
|
||
Jsonb(
|
||
{
|
||
"schema_version": "2.3",
|
||
"entity_types": list(schema["entity_types"].keys()),
|
||
"relation_types": list(schema["relation_types"].keys()),
|
||
"notes": "Hotel/Restaurant 独立 POI;TravelItem 费用项必须挂 scenic_id;默认配置关系携带 binding_id/resource_type/resource_id。",
|
||
}
|
||
),
|
||
),
|
||
)
|
||
conn.commit()
|
||
return schema_id
|
||
|
||
|
||
def main() -> None:
|
||
schema = build_schema()
|
||
files = write_schema_files(schema)
|
||
schema_id = publish_schema(schema)
|
||
print(
|
||
json.dumps(
|
||
{
|
||
"schema_id": schema_id,
|
||
"schema_version": schema["version"],
|
||
"entities": list(schema["entity_types"].keys()),
|
||
"relations": list(schema["relation_types"].keys()),
|
||
"travel_item_types": schema["enums"]["TravelItem.type"],
|
||
"files": files,
|
||
},
|
||
ensure_ascii=False,
|
||
indent=2,
|
||
)
|
||
)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|