Files
bxh/scripts/publish_travel_agency_2_2_schema.py

577 lines
25 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import json
import shutil
from copy import deepcopy
from pathlib import Path
import psycopg
from psycopg.rows import dict_row
from psycopg.types.json import Jsonb
from app.config import settings
from common_paths import PROJECT_ROOT, TRAVEL_KG_EXPORT_ROOT
PROJECT_ID = "travel_agency_2_0_test"
TENANT_ID = "travel_agency"
GRAPH_NAME = "travel_agency_2_0_test"
NAMESPACE = "travel_agency_2_0"
SCHEMA_DIR = PROJECT_ROOT / "schema搭建/travel_agency_2_0_test"
DOWNLOAD_DIR = TRAVEL_KG_EXPORT_ROOT / "travel_agency_2_0_test_旅行社2.0测试"
CURRENT_JSON = SCHEMA_DIR / "travel_agency_2_0_schema.current.json"
def schema_to_dsl(payload: dict) -> str:
lines: list[str] = [
"```text",
f"namespace {payload.get('namespace', NAMESPACE)}",
f"version {payload.get('version', '')}",
"",
"// 2.2Hotel/Restaurant 独立 POITravelItem 只保留非 POI 服务/费用;默认配置用 resource_type/resource_id 关系属性表达。",
]
for name, meta in payload.get("entity_types", {}).items():
lines.append("")
lines.append(f"entity {name} // {meta.get('definition', '')}")
if meta.get("primary_key"):
lines.append(f" primary_key {meta['primary_key']}")
for field in meta.get("fields", []):
lines.append(f" property {field}: Text")
for name, meta in payload.get("relation_types", {}).items():
lines.append("")
lines.append(
f"relation {name}: {meta.get('from', '')} -> {meta.get('to', '')} // {meta.get('definition', '')}"
)
for prop in meta.get("properties", []):
lines.append(f" property {prop}: Text")
lines.extend(["", "// design_principles"])
for item in payload.get("design_principles", []):
lines.append(f"- {item}")
lines.extend(["", "// revision_notes"])
for item in payload.get("revision_notes", []):
lines.append(f"- {item}")
lines.extend(["", "// query_recipes"])
for name, query in payload.get("query_recipes", {}).items():
lines.append(f"{name}:")
lines.append(query)
lines.append("```")
return "\n".join(lines) + "\n"
def build_schema() -> dict:
schema = deepcopy(json.loads(CURRENT_JSON.read_text(encoding="utf-8")))
schema["version"] = "2.2"
schema["display_name"] = "旅行社2.2固定线路 nearby POI 资源图谱 Schema"
schema["purpose"] = (
"面向旅行社固定线路与小包团销售路线像公交线路一样固定RouteStop 串联景区;"
"Hotel、Restaurant 独立为有物理地址和图片的 POI 实体;车辆、门票、小交通、保险、二消、活动、视频讲解等非 POI 服务/费用统一保留在 TravelItem"
"默认配置通过 RouteStop 到资源的关系属性表达 INCLUDED/OPTIONAL/MANDATORY可替换住宿/餐饮通过 ScenicAttraction 到 Hotel/Restaurant 的 nearby 关系检索;"
"景区门票、二消和活动通过 ScenicAttraction 到 TravelItem 的 HAS_ITEM 关系挂载;价格统一使用 ResourcePriceRule。"
)
enums = schema.setdefault("enums", {})
enums["TravelItem.type"] = [
"Vehicle",
"Ticket",
"ScenicTransport",
"Activity",
"Insurance",
"Shopping",
"Service",
"VideoGuide",
"Guide",
"Pickup",
"GiftService",
"Other",
]
enums["ResourceRefType"] = ["Hotel", "Restaurant", "TravelItem"]
enums["PriceRule.target_type"] = ["TravelItem", "Hotel", "HotelRoomType", "Restaurant"]
entity_types = schema.setdefault("entity_types", {})
entity_types["TravelItem"] = {
"definition": (
"非 POI 的统一服务/费用资源。车辆、门票、小交通、保险、景区二消、活动、购物、讲解、导游、接送等进入该类型;"
"Hotel/Restaurant 不再放入 TravelItem。"
),
"primary_key": "item_id",
"fields": [
"item_id",
"name",
"type",
"subtype",
"category",
"supplier_name",
"partner_status",
"business_status",
"description",
"price",
"price_text",
"adult_price",
"child_price",
"currency",
"unit",
"capacity",
"seat_count",
"safe_passenger_capacity",
"recommended_passenger_min",
"recommended_passenger_max",
"luggage_capacity",
"vehicle_group",
"vehicle_grade",
"vehicle_count_formula",
"service_rights",
"province",
"city",
"county",
"town",
"region_name",
"scenic_name",
"primary_image_url",
"image_urls",
"source_file",
"source_row",
"price_source",
"data_quality",
"policy_json",
"discount_policy",
"refund_policy",
"child_policy",
"elder_policy",
"military_policy",
"student_policy",
"mandatory_fee_policy",
"free_ticket_policy",
"replacement_policy",
"inventory_status",
"inventory_source",
"requires_supplier_confirm",
"supplier_confirm_notes",
],
}
entity_types["Hotel"] = {
"definition": "独立酒店 POI。酒店有物理地址、经纬度、图片、星级、房型和供应商合作状态可跨多条固定线路复用。",
"primary_key": "hotel_id",
"fields": [
"hotel_id",
"name",
"type",
"subtype",
"star_rating",
"hotel_grade",
"supplier_name",
"partner_status",
"business_status",
"description",
"address",
"province",
"city",
"county",
"town",
"region_name",
"nearby_scenic_name",
"lat",
"lng",
"contact_phone",
"facilities",
"features",
"breakfast_policy",
"room_type_summary",
"base_price_text",
"primary_image_url",
"image_urls",
"source_file",
"source_row",
"price_source",
"data_quality",
"policy_json",
"refund_policy",
"inventory_status",
"inventory_source",
"requires_supplier_confirm",
"supplier_confirm_notes",
],
}
entity_types["Restaurant"] = {
"definition": "独立餐厅 POI。餐厅有物理地址、菜系、餐标、容量和图片可作为行程用餐节点的默认或可替换资源。",
"primary_key": "restaurant_id",
"fields": [
"restaurant_id",
"name",
"type",
"subtype",
"cuisine_type",
"meal_type",
"supplier_name",
"partner_status",
"business_status",
"description",
"avg_price",
"price_text",
"currency",
"unit",
"capacity",
"private_room",
"signature_dishes",
"address",
"province",
"city",
"county",
"town",
"region_name",
"nearby_scenic_name",
"lat",
"lng",
"contact_phone",
"features",
"primary_image_url",
"image_urls",
"source_file",
"source_row",
"price_source",
"data_quality",
"policy_json",
"refund_policy",
"inventory_status",
"inventory_source",
"requires_supplier_confirm",
"supplier_confirm_notes",
],
}
entity_types["HotelRoomType"] = {
"definition": "酒店下的房型/用房口径,例如标间、大床、三人间、司陪房。房型必须挂到 Hotel不再挂 TravelItem。",
"primary_key": "room_id",
"fields": [
"room_id",
"hotel_id",
"hotel_name",
"room_name",
"room_category",
"bed_type",
"occupancy",
"breakfast_policy",
"raw_price_text",
"applicable_product_text",
"source_file",
"source_row",
"data_quality",
"inventory_status",
"inventory_source",
"requires_supplier_confirm",
"supplier_confirm_notes",
],
}
price_fields = list(entity_types["ResourcePriceRule"]["fields"])
for field in ["hotel_id", "restaurant_id", "restaurant_name", "target_name", "meal_type"]:
if field not in price_fields:
price_fields.append(field)
entity_types["ResourcePriceRule"] = {
"definition": "通用资源价格规则。可挂在 TravelItem、HotelRoomType 或 Restaurant 下,表达淡旺季、周末、节假日、人数阶梯、房型、车型、餐标、门票人群等价格规则。",
"primary_key": "price_rule_id",
"fields": price_fields,
}
order = [
"TourProduct",
"ProductDay",
"RouteStop",
"ScenicAttraction",
"SubAttraction",
"Hotel",
"HotelRoomType",
"Restaurant",
"TravelItem",
"ResourcePriceRule",
]
schema["entity_types"] = {name: entity_types[name] for name in order if name in entity_types}
common_default_props = [
"status",
"resource_type",
"resource_id",
"resource_role",
"meal_slot",
"included_in_base_price",
"extra_price",
"price_formula",
"replaceable",
"default_rank",
"notes",
"source_file",
"evidence_level",
]
schema["relation_types"] = {
"PRODUCT_HAS_DAY": schema["relation_types"]["PRODUCT_HAS_DAY"],
"DAY_HAS_STOP": schema["relation_types"]["DAY_HAS_STOP"],
"ROUTE_STOP_NEXT": schema["relation_types"]["ROUTE_STOP_NEXT"],
"STOP_VISITS_ATTRACTION": schema["relation_types"]["STOP_VISITS_ATTRACTION"],
"STOP_VISITS_SUB_ATTRACTION": schema["relation_types"]["STOP_VISITS_SUB_ATTRACTION"],
"SUB_ATTRACTION_BELONGS_TO_ATTRACTION": schema["relation_types"]["SUB_ATTRACTION_BELONGS_TO_ATTRACTION"],
"ATTRACTION_NEARBY_RESOURCE": {
"from": "ScenicAttraction",
"to": "Hotel|Restaurant",
"definition": "景区附近可替换/推荐的住宿和餐饮 POI 资源。只连接有物理地址的 Hotel、Restaurant。",
"properties": [
"resource_type",
"resource_id",
"distance_km",
"driving_minutes",
"walking_minutes",
"rank",
"is_partner",
"is_default_candidate",
"fit_reason",
"source",
"confidence",
"updated_at",
],
},
"ATTRACTION_HAS_ITEM": {
"from": "ScenicAttraction",
"to": "TravelItem",
"definition": "景区拥有的非 POI 服务/费用项,例如门票、小交通、保险、二消、活动、视频讲解。",
"properties": [
"status",
"item_role",
"included_in_base_price",
"mandatory",
"optional",
"notes",
"source_file",
"evidence_level",
],
},
"STOP_USES_DEFAULT_RESOURCE": {
"from": "RouteStop",
"to": "Hotel|Restaurant|TravelItem",
"definition": "路线停靠点默认使用/默认包含/必付/可选的资源绑定。通过 resource_type/resource_id 指向 Hotel、Restaurant 或 TravelItem不再把默认状态写入资源本体。",
"properties": common_default_props,
},
"PRODUCT_USES_DEFAULT_RESOURCE": {
"from": "TourProduct",
"to": "Hotel|Restaurant|TravelItem",
"definition": "产品级默认资源,主要用于整条线路车辆、接送、保险等不依赖单个站点的服务,也可记录产品级默认酒店或餐饮口径。",
"properties": common_default_props,
},
"HOTEL_HAS_ROOM_TYPE": {
"from": "Hotel",
"to": "HotelRoomType",
"definition": "酒店拥有房型/用房口径。",
"properties": ["source_file", "source_row"],
},
"ITEM_HAS_PRICE_RULE": {
"from": "TravelItem",
"to": "ResourcePriceRule",
"definition": "非 POI 服务/费用资源拥有价格规则,适用于车辆、门票、小交通、保险、景区二消、活动等。",
"properties": ["price_scope", "source_file", "source_row", "evidence_level"],
},
"ROOM_TYPE_HAS_PRICE_RULE": schema["relation_types"]["ROOM_TYPE_HAS_PRICE_RULE"],
"RESTAURANT_HAS_PRICE_RULE": {
"from": "Restaurant",
"to": "ResourcePriceRule",
"definition": "餐厅拥有餐标/套餐/人数阶梯等价格规则。",
"properties": ["price_scope", "source_file", "source_row", "evidence_level"],
},
}
schema["query_recipes"] = {
"route_bus_like_view": "MATCH (p:TourProduct)-[:PRODUCT_HAS_DAY]->(d:ProductDay)-[:DAY_HAS_STOP]->(s:RouteStop) WHERE p.product_id=$product_id RETURN p,d,s ORDER BY d.day_no,s.order_no",
"default_resources_for_stop": "MATCH (s:RouteStop)-[r:STOP_USES_DEFAULT_RESOURCE]->(resource) WHERE s.stop_id=$stop_id RETURN s,r,labels(resource) AS resource_labels,resource ORDER BY r.default_rank",
"nearby_hotels_restaurants": "MATCH (s:RouteStop)-[:STOP_VISITS_ATTRACTION]->(a:ScenicAttraction)-[near:ATTRACTION_NEARBY_RESOURCE]->(resource) WHERE s.stop_id=$stop_id AND near.resource_type IN $types RETURN a,near,labels(resource) AS resource_labels,resource ORDER BY near.is_partner DESC, near.driving_minutes ASC, near.rank ASC",
"attraction_fee_items": "MATCH (a:ScenicAttraction)-[r:ATTRACTION_HAS_ITEM]->(item:TravelItem) WHERE a.attraction_id=$attraction_id RETURN a,r,item ORDER BY r.item_role,item.type,item.name",
"service_price_rules": "MATCH (item:TravelItem)-[r:ITEM_HAS_PRICE_RULE]->(price:ResourcePriceRule) WHERE item.item_id=$item_id RETURN item,r,price ORDER BY price.event_type,price.date_range",
"hotel_room_price_rules": "MATCH (hotel:Hotel)-[:HOTEL_HAS_ROOM_TYPE]->(room:HotelRoomType)-[r:ROOM_TYPE_HAS_PRICE_RULE]->(price:ResourcePriceRule) WHERE hotel.hotel_id=$hotel_id RETURN hotel,room,r,price ORDER BY room.room_name,price.season,price.event_type",
"restaurant_price_rules": "MATCH (restaurant:Restaurant)-[r:RESTAURANT_HAS_PRICE_RULE]->(price:ResourcePriceRule) WHERE restaurant.restaurant_id=$restaurant_id RETURN restaurant,r,price ORDER BY price.event_type,price.date_range",
}
schema["design_principles"] = [
"路线固定TourProduct -> ProductDay -> RouteStop -> ScenicAttraction站点顺序不可被推荐逻辑随意改写。",
"POI 与费用分层Hotel、Restaurant、ScenicAttraction/SubAttraction 是有物理位置的 POI车辆、门票、小交通、保险、二消、活动、讲解等保留为 TravelItem。",
"默认配置轻量化:不建 ItineraryResourceBinding 实体RouteStop/TourProduct 通过 STOP_USES_DEFAULT_RESOURCE 或 PRODUCT_USES_DEFAULT_RESOURCE 关系属性表达 INCLUDED、OPTIONAL、MANDATORY。",
"附近资源池:景区只通过 ATTRACTION_NEARBY_RESOURCE 连接 Hotel/Restaurant后期可用高德车程补充 driving_minutes。",
"景区费用项:景区门票、小交通、保险、二消、活动、视频讲解通过 ATTRACTION_HAS_ITEM 连接 TravelItem不作为独立 POI。",
"行政区属性化province/city/county/town/region_name 写在景区、酒店、餐厅和资源实体上,避免行政区层级关系把图谱撑乱。",
"酒店价格细化Hotel -> HotelRoomType -> ResourcePriceRule酒店本体不揉入淡旺季和房型价格。",
"餐饮价格细化Restaurant -> ResourcePriceRule餐标、套餐和人数阶梯用价格规则表达。",
"规则属性化:第一阶段把儿童、老人、军人、退改、免票等规则写在 policy_json/discount_policy/refund_policy 中。",
"库存延后:当前图谱不承担实时库存交易,只标记 inventory_status/requires_supplier_confirm。",
]
schema["not_in_scope_for_2_2"] = [
"CustomerSelection 暂不入图谱;用户选择记录放业务订单系统或会话侧。",
"AdministrativeRegion 暂不作为实体层级;行政区先作为景区、酒店、餐厅和资源属性。",
"ItineraryResourceBinding 暂不作为实体;默认/必付/可选状态放在 STOP_USES_DEFAULT_RESOURCE/PRODUCT_USES_DEFAULT_RESOURCE 关系属性。",
"Ticket/Vehicle/Activity/Insurance/GiftService 不拆成 POI 实体,仍归 TravelItem。",
"Inventory/InventorySnapshot 暂不作为实体;当前仅标记 inventory_status/requires_supplier_confirm。",
]
schema["revision_notes"] = [
"2.2 将 Hotel、Restaurant 从 TravelItem 拆为独立 POI 实体,解决酒店房型、餐饮餐标、图片、地址和跨产品复用的维护问题。",
"TravelItem 改为非 POI 服务/费用资源池,去掉 Hotel/Restaurant 类型。",
"STOP_USES_DEFAULT_ITEM/PRODUCT_USES_DEFAULT_ITEM 升级为 STOP_USES_DEFAULT_RESOURCE/PRODUCT_USES_DEFAULT_RESOURCE通过 resource_type/resource_id 支持 Hotel、Restaurant、TravelItem。",
"ATTRACTION_NEARBY_ITEM 拆分语义:住宿/餐饮推荐走 ATTRACTION_NEARBY_RESOURCE景区费用项走 ATTRACTION_HAS_ITEM。",
"HotelRoomType 改挂 HotelRestaurant 可直接挂 ResourcePriceRuleResourcePriceRule 支持 TravelItem、HotelRoomType、Restaurant 等目标。",
]
return schema
def write_schema_files(schema: dict) -> dict[str, str]:
SCHEMA_DIR.mkdir(parents=True, exist_ok=True)
DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
paths = {
"current_json": SCHEMA_DIR / "travel_agency_2_0_schema.current.json",
"current_dsl": SCHEMA_DIR / "travel_agency_2_0_schema.current.dsl.md",
"v_json": SCHEMA_DIR / "travel_agency_2_0_schema.v2_2.json",
"v_dsl": SCHEMA_DIR / "travel_agency_2_0_schema.v2_2.dsl.md",
"sample_json": SCHEMA_DIR / "travel_agency_2_0_schema.v2_2.original_sample.json",
"sample_dsl": SCHEMA_DIR / "travel_agency_2_0_schema.v2_2.original_sample.dsl.md",
}
json_text = json.dumps(schema, ensure_ascii=False, indent=2)
dsl_text = schema_to_dsl(schema)
for path in paths.values():
path.write_text(json_text if path.suffix == ".json" else dsl_text, encoding="utf-8")
shutil.copy2(path, DOWNLOAD_DIR / path.name)
readme_text = """# 旅行社2.0测试 Schema
当前 active 版本:`2.2`。
核心调整:
- `Hotel`、`Restaurant` 独立为 POI 实体,用于地址、图片、房型/餐标、合作状态和跨线路复用。
- `TravelItem` 只保留非 POI 服务/费用:车辆、门票、小交通、保险、二消、活动、购物、讲解、导游、接送等。
- 默认配置不建 `ItineraryResourceBinding` 实体,改用 `STOP_USES_DEFAULT_RESOURCE` / `PRODUCT_USES_DEFAULT_RESOURCE` 关系属性表达 `INCLUDED`、`OPTIONAL`、`MANDATORY`。
- 景区附近住宿/餐饮推荐走 `ATTRACTION_NEARBY_RESOURCE`;景区门票/二消/小交通走 `ATTRACTION_HAS_ITEM`。
- 酒店房型价格:`Hotel -> HotelRoomType -> ResourcePriceRule`;餐厅餐标价格:`Restaurant -> ResourcePriceRule`。
- 行政区仍然属性化,不单独建 `AdministrativeRegion` 实体。
文件:
- `travel_agency_2_0_schema.current.dsl.md`
- `travel_agency_2_0_schema.current.json`
- `travel_agency_2_0_schema.v2_2.dsl.md`
- `travel_agency_2_0_schema.v2_2.json`
"""
(SCHEMA_DIR / "README.md").write_text(readme_text, encoding="utf-8")
(DOWNLOAD_DIR / "README.md").write_text(readme_text, encoding="utf-8")
return {key: str(path) for key, path in paths.items()}
def publish_schema(schema: dict) -> int:
with psycopg.connect(settings.database_url, row_factory=dict_row) as conn:
with conn.cursor() as cur:
cur.execute(
f"""
UPDATE {settings.db_schema}.ontology_schemas
SET status='published', updated_at=now()
WHERE tenant_id=%s AND project_id=%s AND status='active'
""",
(TENANT_ID, PROJECT_ID),
)
cur.execute(
f"""
INSERT INTO {settings.db_schema}.ontology_schemas (
tenant_id, project_id, namespace, version, display_name, description,
status, schema_jsonb, created_by, published_by, published_at, updated_at
)
VALUES (%s,%s,%s,%s,%s,%s,'active',%s,%s,%s,now(),now())
RETURNING id
""",
(
TENANT_ID,
PROJECT_ID,
NAMESPACE,
4,
schema["display_name"],
schema["purpose"],
Jsonb(schema),
"codex",
"codex",
),
)
schema_id = cur.fetchone()["id"]
cur.execute(
f"""
UPDATE {settings.db_schema}.graph_releases
SET schema_id=%s,
graph_release_id=%s,
graph_name=%s,
status='active',
activated_at=now(),
updated_at=now(),
metadata_jsonb = coalesce(metadata_jsonb, '{{}}'::jsonb) || %s::jsonb
WHERE tenant_id=%s AND project_id=%s AND alias='active'
RETURNING id
""",
(
schema_id,
"travel_agency_2_0_test_v2_2",
GRAPH_NAME,
Jsonb({"schema_version": "2.2", "schema_id": schema_id}),
TENANT_ID,
PROJECT_ID,
),
)
if not cur.fetchone():
cur.execute(
f"""
INSERT INTO {settings.db_schema}.graph_releases (
tenant_id, project_id, graph_release_id, graph_name, alias, status,
schema_id, metadata_jsonb, created_by, activated_at, updated_at
) VALUES (%s,%s,%s,%s,'active','active',%s,%s,%s,now(),now())
""",
(
TENANT_ID,
PROJECT_ID,
"travel_agency_2_0_test_v2_2",
GRAPH_NAME,
schema_id,
Jsonb({"schema_version": "2.2", "schema_id": schema_id}),
"codex",
),
)
template_payload = Jsonb(
{
"schema_version": "2.2",
"entity_types": list(schema["entity_types"].keys()),
"relation_types": list(schema["relation_types"].keys()),
"notes": "Hotel/Restaurant 为 POI 实体TravelItem 为非 POI 服务/费用;默认配置关系使用 resource_type/resource_id。",
}
)
cur.execute(
f"""
INSERT INTO {settings.db_schema}.import_templates (
template_id, version, display_name, primary_entity, template_jsonb, status, updated_at
)
VALUES (%s, 3, %s, 'TourProduct', %s, 'active', now())
ON CONFLICT (template_id, version) DO UPDATE
SET display_name=EXCLUDED.display_name,
primary_entity=EXCLUDED.primary_entity,
template_jsonb=EXCLUDED.template_jsonb,
status=EXCLUDED.status,
updated_at=now()
""",
(
"travel_agency_2_0_fixed_route_nearby_v1",
"旅行社2.2固定线路 POI 资源导入模板",
template_payload,
),
)
conn.commit()
return schema_id
def main() -> None:
schema = build_schema()
files = write_schema_files(schema)
schema_id = publish_schema(schema)
print(
json.dumps(
{
"schema_id": schema_id,
"schema_version": schema["version"],
"entities": list(schema["entity_types"].keys()),
"relations": list(schema["relation_types"].keys()),
"files": files,
},
ensure_ascii=False,
indent=2,
)
)
if __name__ == "__main__":
main()