Initial travel knowledge graph release

This commit is contained in:
2026-06-09 09:56:26 +08:00
commit 5f061295d8
402 changed files with 103877 additions and 0 deletions

View File

@@ -0,0 +1,575 @@
from __future__ import annotations
import json
import shutil
from copy import deepcopy
from pathlib import Path
import psycopg
from psycopg.rows import dict_row
from psycopg.types.json import Jsonb
from app.config import settings
PROJECT_ID = "travel_agency_2_0_test"
TENANT_ID = "travel_agency"
GRAPH_NAME = "travel_agency_2_0_test"
NAMESPACE = "travel_agency_2_0"
SCHEMA_DIR = Path("/Users/xuexue/new2/schema搭建/travel_agency_2_0_test")
DOWNLOAD_DIR = Path("/Users/xuexue/Downloads/图谱数据/travel_agency_2_0_test_旅行社2.0测试")
CURRENT_JSON = SCHEMA_DIR / "travel_agency_2_0_schema.current.json"
def schema_to_dsl(payload: dict) -> str:
lines: list[str] = [
"```text",
f"namespace {payload.get('namespace', NAMESPACE)}",
f"version {payload.get('version', '')}",
"",
"// 2.2Hotel/Restaurant 独立 POITravelItem 只保留非 POI 服务/费用;默认配置用 resource_type/resource_id 关系属性表达。",
]
for name, meta in payload.get("entity_types", {}).items():
lines.append("")
lines.append(f"entity {name} // {meta.get('definition', '')}")
if meta.get("primary_key"):
lines.append(f" primary_key {meta['primary_key']}")
for field in meta.get("fields", []):
lines.append(f" property {field}: Text")
for name, meta in payload.get("relation_types", {}).items():
lines.append("")
lines.append(
f"relation {name}: {meta.get('from', '')} -> {meta.get('to', '')} // {meta.get('definition', '')}"
)
for prop in meta.get("properties", []):
lines.append(f" property {prop}: Text")
lines.extend(["", "// design_principles"])
for item in payload.get("design_principles", []):
lines.append(f"- {item}")
lines.extend(["", "// revision_notes"])
for item in payload.get("revision_notes", []):
lines.append(f"- {item}")
lines.extend(["", "// query_recipes"])
for name, query in payload.get("query_recipes", {}).items():
lines.append(f"{name}:")
lines.append(query)
lines.append("```")
return "\n".join(lines) + "\n"
def build_schema() -> dict:
schema = deepcopy(json.loads(CURRENT_JSON.read_text(encoding="utf-8")))
schema["version"] = "2.2"
schema["display_name"] = "旅行社2.2固定线路 nearby POI 资源图谱 Schema"
schema["purpose"] = (
"面向旅行社固定线路与小包团销售路线像公交线路一样固定RouteStop 串联景区;"
"Hotel、Restaurant 独立为有物理地址和图片的 POI 实体;车辆、门票、小交通、保险、二消、活动、视频讲解等非 POI 服务/费用统一保留在 TravelItem"
"默认配置通过 RouteStop 到资源的关系属性表达 INCLUDED/OPTIONAL/MANDATORY可替换住宿/餐饮通过 ScenicAttraction 到 Hotel/Restaurant 的 nearby 关系检索;"
"景区门票、二消和活动通过 ScenicAttraction 到 TravelItem 的 HAS_ITEM 关系挂载;价格统一使用 ResourcePriceRule。"
)
enums = schema.setdefault("enums", {})
enums["TravelItem.type"] = [
"Vehicle",
"Ticket",
"ScenicTransport",
"Activity",
"Insurance",
"Shopping",
"Service",
"VideoGuide",
"Guide",
"Pickup",
"GiftService",
"Other",
]
enums["ResourceRefType"] = ["Hotel", "Restaurant", "TravelItem"]
enums["PriceRule.target_type"] = ["TravelItem", "Hotel", "HotelRoomType", "Restaurant"]
entity_types = schema.setdefault("entity_types", {})
entity_types["TravelItem"] = {
"definition": (
"非 POI 的统一服务/费用资源。车辆、门票、小交通、保险、景区二消、活动、购物、讲解、导游、接送等进入该类型;"
"Hotel/Restaurant 不再放入 TravelItem。"
),
"primary_key": "item_id",
"fields": [
"item_id",
"name",
"type",
"subtype",
"category",
"supplier_name",
"partner_status",
"business_status",
"description",
"price",
"price_text",
"adult_price",
"child_price",
"currency",
"unit",
"capacity",
"seat_count",
"safe_passenger_capacity",
"recommended_passenger_min",
"recommended_passenger_max",
"luggage_capacity",
"vehicle_group",
"vehicle_grade",
"vehicle_count_formula",
"service_rights",
"province",
"city",
"county",
"town",
"region_name",
"scenic_name",
"primary_image_url",
"image_urls",
"source_file",
"source_row",
"price_source",
"data_quality",
"policy_json",
"discount_policy",
"refund_policy",
"child_policy",
"elder_policy",
"military_policy",
"student_policy",
"mandatory_fee_policy",
"free_ticket_policy",
"replacement_policy",
"inventory_status",
"inventory_source",
"requires_supplier_confirm",
"supplier_confirm_notes",
],
}
entity_types["Hotel"] = {
"definition": "独立酒店 POI。酒店有物理地址、经纬度、图片、星级、房型和供应商合作状态可跨多条固定线路复用。",
"primary_key": "hotel_id",
"fields": [
"hotel_id",
"name",
"type",
"subtype",
"star_rating",
"hotel_grade",
"supplier_name",
"partner_status",
"business_status",
"description",
"address",
"province",
"city",
"county",
"town",
"region_name",
"nearby_scenic_name",
"lat",
"lng",
"contact_phone",
"facilities",
"features",
"breakfast_policy",
"room_type_summary",
"base_price_text",
"primary_image_url",
"image_urls",
"source_file",
"source_row",
"price_source",
"data_quality",
"policy_json",
"refund_policy",
"inventory_status",
"inventory_source",
"requires_supplier_confirm",
"supplier_confirm_notes",
],
}
entity_types["Restaurant"] = {
"definition": "独立餐厅 POI。餐厅有物理地址、菜系、餐标、容量和图片可作为行程用餐节点的默认或可替换资源。",
"primary_key": "restaurant_id",
"fields": [
"restaurant_id",
"name",
"type",
"subtype",
"cuisine_type",
"meal_type",
"supplier_name",
"partner_status",
"business_status",
"description",
"avg_price",
"price_text",
"currency",
"unit",
"capacity",
"private_room",
"signature_dishes",
"address",
"province",
"city",
"county",
"town",
"region_name",
"nearby_scenic_name",
"lat",
"lng",
"contact_phone",
"features",
"primary_image_url",
"image_urls",
"source_file",
"source_row",
"price_source",
"data_quality",
"policy_json",
"refund_policy",
"inventory_status",
"inventory_source",
"requires_supplier_confirm",
"supplier_confirm_notes",
],
}
entity_types["HotelRoomType"] = {
"definition": "酒店下的房型/用房口径,例如标间、大床、三人间、司陪房。房型必须挂到 Hotel不再挂 TravelItem。",
"primary_key": "room_id",
"fields": [
"room_id",
"hotel_id",
"hotel_name",
"room_name",
"room_category",
"bed_type",
"occupancy",
"breakfast_policy",
"raw_price_text",
"applicable_product_text",
"source_file",
"source_row",
"data_quality",
"inventory_status",
"inventory_source",
"requires_supplier_confirm",
"supplier_confirm_notes",
],
}
price_fields = list(entity_types["ResourcePriceRule"]["fields"])
for field in ["hotel_id", "restaurant_id", "restaurant_name", "target_name", "meal_type"]:
if field not in price_fields:
price_fields.append(field)
entity_types["ResourcePriceRule"] = {
"definition": "通用资源价格规则。可挂在 TravelItem、HotelRoomType 或 Restaurant 下,表达淡旺季、周末、节假日、人数阶梯、房型、车型、餐标、门票人群等价格规则。",
"primary_key": "price_rule_id",
"fields": price_fields,
}
order = [
"TourProduct",
"ProductDay",
"RouteStop",
"ScenicAttraction",
"SubAttraction",
"Hotel",
"HotelRoomType",
"Restaurant",
"TravelItem",
"ResourcePriceRule",
]
schema["entity_types"] = {name: entity_types[name] for name in order if name in entity_types}
common_default_props = [
"status",
"resource_type",
"resource_id",
"resource_role",
"meal_slot",
"included_in_base_price",
"extra_price",
"price_formula",
"replaceable",
"default_rank",
"notes",
"source_file",
"evidence_level",
]
schema["relation_types"] = {
"PRODUCT_HAS_DAY": schema["relation_types"]["PRODUCT_HAS_DAY"],
"DAY_HAS_STOP": schema["relation_types"]["DAY_HAS_STOP"],
"ROUTE_STOP_NEXT": schema["relation_types"]["ROUTE_STOP_NEXT"],
"STOP_VISITS_ATTRACTION": schema["relation_types"]["STOP_VISITS_ATTRACTION"],
"STOP_VISITS_SUB_ATTRACTION": schema["relation_types"]["STOP_VISITS_SUB_ATTRACTION"],
"SUB_ATTRACTION_BELONGS_TO_ATTRACTION": schema["relation_types"]["SUB_ATTRACTION_BELONGS_TO_ATTRACTION"],
"ATTRACTION_NEARBY_RESOURCE": {
"from": "ScenicAttraction",
"to": "Hotel|Restaurant",
"definition": "景区附近可替换/推荐的住宿和餐饮 POI 资源。只连接有物理地址的 Hotel、Restaurant。",
"properties": [
"resource_type",
"resource_id",
"distance_km",
"driving_minutes",
"walking_minutes",
"rank",
"is_partner",
"is_default_candidate",
"fit_reason",
"source",
"confidence",
"updated_at",
],
},
"ATTRACTION_HAS_ITEM": {
"from": "ScenicAttraction",
"to": "TravelItem",
"definition": "景区拥有的非 POI 服务/费用项,例如门票、小交通、保险、二消、活动、视频讲解。",
"properties": [
"status",
"item_role",
"included_in_base_price",
"mandatory",
"optional",
"notes",
"source_file",
"evidence_level",
],
},
"STOP_USES_DEFAULT_RESOURCE": {
"from": "RouteStop",
"to": "Hotel|Restaurant|TravelItem",
"definition": "路线停靠点默认使用/默认包含/必付/可选的资源绑定。通过 resource_type/resource_id 指向 Hotel、Restaurant 或 TravelItem不再把默认状态写入资源本体。",
"properties": common_default_props,
},
"PRODUCT_USES_DEFAULT_RESOURCE": {
"from": "TourProduct",
"to": "Hotel|Restaurant|TravelItem",
"definition": "产品级默认资源,主要用于整条线路车辆、接送、保险等不依赖单个站点的服务,也可记录产品级默认酒店或餐饮口径。",
"properties": common_default_props,
},
"HOTEL_HAS_ROOM_TYPE": {
"from": "Hotel",
"to": "HotelRoomType",
"definition": "酒店拥有房型/用房口径。",
"properties": ["source_file", "source_row"],
},
"ITEM_HAS_PRICE_RULE": {
"from": "TravelItem",
"to": "ResourcePriceRule",
"definition": "非 POI 服务/费用资源拥有价格规则,适用于车辆、门票、小交通、保险、景区二消、活动等。",
"properties": ["price_scope", "source_file", "source_row", "evidence_level"],
},
"ROOM_TYPE_HAS_PRICE_RULE": schema["relation_types"]["ROOM_TYPE_HAS_PRICE_RULE"],
"RESTAURANT_HAS_PRICE_RULE": {
"from": "Restaurant",
"to": "ResourcePriceRule",
"definition": "餐厅拥有餐标/套餐/人数阶梯等价格规则。",
"properties": ["price_scope", "source_file", "source_row", "evidence_level"],
},
}
schema["query_recipes"] = {
"route_bus_like_view": "MATCH (p:TourProduct)-[:PRODUCT_HAS_DAY]->(d:ProductDay)-[:DAY_HAS_STOP]->(s:RouteStop) WHERE p.product_id=$product_id RETURN p,d,s ORDER BY d.day_no,s.order_no",
"default_resources_for_stop": "MATCH (s:RouteStop)-[r:STOP_USES_DEFAULT_RESOURCE]->(resource) WHERE s.stop_id=$stop_id RETURN s,r,labels(resource) AS resource_labels,resource ORDER BY r.default_rank",
"nearby_hotels_restaurants": "MATCH (s:RouteStop)-[:STOP_VISITS_ATTRACTION]->(a:ScenicAttraction)-[near:ATTRACTION_NEARBY_RESOURCE]->(resource) WHERE s.stop_id=$stop_id AND near.resource_type IN $types RETURN a,near,labels(resource) AS resource_labels,resource ORDER BY near.is_partner DESC, near.driving_minutes ASC, near.rank ASC",
"attraction_fee_items": "MATCH (a:ScenicAttraction)-[r:ATTRACTION_HAS_ITEM]->(item:TravelItem) WHERE a.attraction_id=$attraction_id RETURN a,r,item ORDER BY r.item_role,item.type,item.name",
"service_price_rules": "MATCH (item:TravelItem)-[r:ITEM_HAS_PRICE_RULE]->(price:ResourcePriceRule) WHERE item.item_id=$item_id RETURN item,r,price ORDER BY price.event_type,price.date_range",
"hotel_room_price_rules": "MATCH (hotel:Hotel)-[:HOTEL_HAS_ROOM_TYPE]->(room:HotelRoomType)-[r:ROOM_TYPE_HAS_PRICE_RULE]->(price:ResourcePriceRule) WHERE hotel.hotel_id=$hotel_id RETURN hotel,room,r,price ORDER BY room.room_name,price.season,price.event_type",
"restaurant_price_rules": "MATCH (restaurant:Restaurant)-[r:RESTAURANT_HAS_PRICE_RULE]->(price:ResourcePriceRule) WHERE restaurant.restaurant_id=$restaurant_id RETURN restaurant,r,price ORDER BY price.event_type,price.date_range",
}
schema["design_principles"] = [
"路线固定TourProduct -> ProductDay -> RouteStop -> ScenicAttraction站点顺序不可被推荐逻辑随意改写。",
"POI 与费用分层Hotel、Restaurant、ScenicAttraction/SubAttraction 是有物理位置的 POI车辆、门票、小交通、保险、二消、活动、讲解等保留为 TravelItem。",
"默认配置轻量化:不建 ItineraryResourceBinding 实体RouteStop/TourProduct 通过 STOP_USES_DEFAULT_RESOURCE 或 PRODUCT_USES_DEFAULT_RESOURCE 关系属性表达 INCLUDED、OPTIONAL、MANDATORY。",
"附近资源池:景区只通过 ATTRACTION_NEARBY_RESOURCE 连接 Hotel/Restaurant后期可用高德车程补充 driving_minutes。",
"景区费用项:景区门票、小交通、保险、二消、活动、视频讲解通过 ATTRACTION_HAS_ITEM 连接 TravelItem不作为独立 POI。",
"行政区属性化province/city/county/town/region_name 写在景区、酒店、餐厅和资源实体上,避免行政区层级关系把图谱撑乱。",
"酒店价格细化Hotel -> HotelRoomType -> ResourcePriceRule酒店本体不揉入淡旺季和房型价格。",
"餐饮价格细化Restaurant -> ResourcePriceRule餐标、套餐和人数阶梯用价格规则表达。",
"规则属性化:第一阶段把儿童、老人、军人、退改、免票等规则写在 policy_json/discount_policy/refund_policy 中。",
"库存延后:当前图谱不承担实时库存交易,只标记 inventory_status/requires_supplier_confirm。",
]
schema["not_in_scope_for_2_2"] = [
"CustomerSelection 暂不入图谱;用户选择记录放业务订单系统或会话侧。",
"AdministrativeRegion 暂不作为实体层级;行政区先作为景区、酒店、餐厅和资源属性。",
"ItineraryResourceBinding 暂不作为实体;默认/必付/可选状态放在 STOP_USES_DEFAULT_RESOURCE/PRODUCT_USES_DEFAULT_RESOURCE 关系属性。",
"Ticket/Vehicle/Activity/Insurance/GiftService 不拆成 POI 实体,仍归 TravelItem。",
"Inventory/InventorySnapshot 暂不作为实体;当前仅标记 inventory_status/requires_supplier_confirm。",
]
schema["revision_notes"] = [
"2.2 将 Hotel、Restaurant 从 TravelItem 拆为独立 POI 实体,解决酒店房型、餐饮餐标、图片、地址和跨产品复用的维护问题。",
"TravelItem 改为非 POI 服务/费用资源池,去掉 Hotel/Restaurant 类型。",
"STOP_USES_DEFAULT_ITEM/PRODUCT_USES_DEFAULT_ITEM 升级为 STOP_USES_DEFAULT_RESOURCE/PRODUCT_USES_DEFAULT_RESOURCE通过 resource_type/resource_id 支持 Hotel、Restaurant、TravelItem。",
"ATTRACTION_NEARBY_ITEM 拆分语义:住宿/餐饮推荐走 ATTRACTION_NEARBY_RESOURCE景区费用项走 ATTRACTION_HAS_ITEM。",
"HotelRoomType 改挂 HotelRestaurant 可直接挂 ResourcePriceRuleResourcePriceRule 支持 TravelItem、HotelRoomType、Restaurant 等目标。",
]
return schema
def write_schema_files(schema: dict) -> dict[str, str]:
SCHEMA_DIR.mkdir(parents=True, exist_ok=True)
DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
paths = {
"current_json": SCHEMA_DIR / "travel_agency_2_0_schema.current.json",
"current_dsl": SCHEMA_DIR / "travel_agency_2_0_schema.current.dsl.md",
"v_json": SCHEMA_DIR / "travel_agency_2_0_schema.v2_2.json",
"v_dsl": SCHEMA_DIR / "travel_agency_2_0_schema.v2_2.dsl.md",
"sample_json": SCHEMA_DIR / "travel_agency_2_0_schema.v2_2.original_sample.json",
"sample_dsl": SCHEMA_DIR / "travel_agency_2_0_schema.v2_2.original_sample.dsl.md",
}
json_text = json.dumps(schema, ensure_ascii=False, indent=2)
dsl_text = schema_to_dsl(schema)
for path in paths.values():
path.write_text(json_text if path.suffix == ".json" else dsl_text, encoding="utf-8")
shutil.copy2(path, DOWNLOAD_DIR / path.name)
readme_text = """# 旅行社2.0测试 Schema
当前 active 版本:`2.2`。
核心调整:
- `Hotel`、`Restaurant` 独立为 POI 实体,用于地址、图片、房型/餐标、合作状态和跨线路复用。
- `TravelItem` 只保留非 POI 服务/费用:车辆、门票、小交通、保险、二消、活动、购物、讲解、导游、接送等。
- 默认配置不建 `ItineraryResourceBinding` 实体,改用 `STOP_USES_DEFAULT_RESOURCE` / `PRODUCT_USES_DEFAULT_RESOURCE` 关系属性表达 `INCLUDED`、`OPTIONAL`、`MANDATORY`。
- 景区附近住宿/餐饮推荐走 `ATTRACTION_NEARBY_RESOURCE`;景区门票/二消/小交通走 `ATTRACTION_HAS_ITEM`。
- 酒店房型价格:`Hotel -> HotelRoomType -> ResourcePriceRule`;餐厅餐标价格:`Restaurant -> ResourcePriceRule`。
- 行政区仍然属性化,不单独建 `AdministrativeRegion` 实体。
文件:
- `travel_agency_2_0_schema.current.dsl.md`
- `travel_agency_2_0_schema.current.json`
- `travel_agency_2_0_schema.v2_2.dsl.md`
- `travel_agency_2_0_schema.v2_2.json`
"""
(SCHEMA_DIR / "README.md").write_text(readme_text, encoding="utf-8")
(DOWNLOAD_DIR / "README.md").write_text(readme_text, encoding="utf-8")
return {key: str(path) for key, path in paths.items()}
def publish_schema(schema: dict) -> int:
with psycopg.connect(settings.database_url, row_factory=dict_row) as conn:
with conn.cursor() as cur:
cur.execute(
f"""
UPDATE {settings.db_schema}.ontology_schemas
SET status='published', updated_at=now()
WHERE tenant_id=%s AND project_id=%s AND status='active'
""",
(TENANT_ID, PROJECT_ID),
)
cur.execute(
f"""
INSERT INTO {settings.db_schema}.ontology_schemas (
tenant_id, project_id, namespace, version, display_name, description,
status, schema_jsonb, created_by, published_by, published_at, updated_at
)
VALUES (%s,%s,%s,%s,%s,%s,'active',%s,%s,%s,now(),now())
RETURNING id
""",
(
TENANT_ID,
PROJECT_ID,
NAMESPACE,
4,
schema["display_name"],
schema["purpose"],
Jsonb(schema),
"codex",
"codex",
),
)
schema_id = cur.fetchone()["id"]
cur.execute(
f"""
UPDATE {settings.db_schema}.graph_releases
SET schema_id=%s,
graph_release_id=%s,
graph_name=%s,
status='active',
activated_at=now(),
updated_at=now(),
metadata_jsonb = coalesce(metadata_jsonb, '{{}}'::jsonb) || %s::jsonb
WHERE tenant_id=%s AND project_id=%s AND alias='active'
RETURNING id
""",
(
schema_id,
"travel_agency_2_0_test_v2_2",
GRAPH_NAME,
Jsonb({"schema_version": "2.2", "schema_id": schema_id}),
TENANT_ID,
PROJECT_ID,
),
)
if not cur.fetchone():
cur.execute(
f"""
INSERT INTO {settings.db_schema}.graph_releases (
tenant_id, project_id, graph_release_id, graph_name, alias, status,
schema_id, metadata_jsonb, created_by, activated_at, updated_at
) VALUES (%s,%s,%s,%s,'active','active',%s,%s,%s,now(),now())
""",
(
TENANT_ID,
PROJECT_ID,
"travel_agency_2_0_test_v2_2",
GRAPH_NAME,
schema_id,
Jsonb({"schema_version": "2.2", "schema_id": schema_id}),
"codex",
),
)
template_payload = Jsonb(
{
"schema_version": "2.2",
"entity_types": list(schema["entity_types"].keys()),
"relation_types": list(schema["relation_types"].keys()),
"notes": "Hotel/Restaurant 为 POI 实体TravelItem 为非 POI 服务/费用;默认配置关系使用 resource_type/resource_id。",
}
)
cur.execute(
f"""
INSERT INTO {settings.db_schema}.import_templates (
template_id, version, display_name, primary_entity, template_jsonb, status, updated_at
)
VALUES (%s, 3, %s, 'TourProduct', %s, 'active', now())
ON CONFLICT (template_id, version) DO UPDATE
SET display_name=EXCLUDED.display_name,
primary_entity=EXCLUDED.primary_entity,
template_jsonb=EXCLUDED.template_jsonb,
status=EXCLUDED.status,
updated_at=now()
""",
(
"travel_agency_2_0_fixed_route_nearby_v1",
"旅行社2.2固定线路 POI 资源导入模板",
template_payload,
),
)
conn.commit()
return schema_id
def main() -> None:
schema = build_schema()
files = write_schema_files(schema)
schema_id = publish_schema(schema)
print(
json.dumps(
{
"schema_id": schema_id,
"schema_version": schema["version"],
"entities": list(schema["entity_types"].keys()),
"relations": list(schema["relation_types"].keys()),
"files": files,
},
ensure_ascii=False,
indent=2,
)
)
if __name__ == "__main__":
main()