Initial travel knowledge graph release

This commit is contained in:
2026-06-09 09:56:26 +08:00
commit 5f061295d8
402 changed files with 103877 additions and 0 deletions

View File

@@ -0,0 +1,374 @@
{
"schema_id": "scenic_spot_schema_v0_3",
"schema_name": "通用景区知识图谱 Schema v0.3",
"status": "draft_for_extraction_test",
"target_coverage": "覆盖百度百科/高德 POI/人工材料中约 80% 的通用景区知识,差异化景区通过 SchemaGap 后续扩展",
"principles": [
"稳定高频事实作为节点属性,例如地址、开放时间、门票、等级、建议游玩时长、经纬度、来源 URL、照片 URL",
"实体之间的语义联系作为关系,例如景区位于区域、包含景点、关联历史事件、关联人物、附近景点、附近交通",
"不稳定、长尾、争议或新字段先进入 Statement/Attribute经过审核后再决定是否升级为正式字段或关系",
"普通展示照片回填到最具体实体的 photo_urls/cover_image_url导游图、导览图、线路图、全景图等功能型媒体作为 MediaAsset 独立保存 media_role并挂到景区主体的 guide_map_urls/route_map_urls/panorama_urls",
"所有抽取结果必须带 evidence_quote、source_name、source_url、source_section、confidence方便人工复核和溯源",
"空间能力不放在自然语言抽取里临时生成,必须在入图时统一写入 lat/lng/adcode/h3_r6-h3_r10/GeoCell 关系",
"Event 使用统一 Event 节点,必须同时保存 event_category/event_subtype/event_type/start_time_norm/end_time_norm/date_granularityevent_type 仅兼容旧系统,查询优先使用分类和时间字段"
],
"node_types": {
"ScenicArea": {
"description": "景区/旅游目的地主体,例如花溪公园、梵净山、青岩古镇、遵义会议会址",
"required": ["entity_id", "name", "entity_type", "source_name", "source_url"],
"properties": [
"entity_id",
"name",
"canonical_name",
"aliases",
"foreign_name",
"entity_type",
"scenic_category",
"scenic_level",
"description",
"reputation",
"country",
"province",
"city",
"district",
"address",
"location_text",
"lng",
"lat",
"adcode",
"h3_r6",
"h3_r7",
"h3_r8",
"h3_r9",
"h3_r10",
"climate",
"area_size",
"altitude",
"terrain",
"water_system",
"opening_hours",
"ticket_price",
"suggested_duration",
"best_season",
"famous_spots_text",
"nearby_attractions_text",
"service_features",
"cover_image_url",
"photo_urls",
"guide_map_urls",
"route_map_urls",
"panorama_urls",
"source_name",
"source_url",
"crawl_time",
"last_updated",
"confidence",
"review_status"
]
},
"Attraction": {
"description": "景区内部可游览、可搜索或可作为路径端点的景点节点,例如百步桥、麟山、金顶、蘑菇石、大门、码头、观景台。自然景观、文化点、入口等都通过 category/spot_type 区分,路径 from/to 统一指向 Attraction",
"required": ["entity_id", "name", "parent_name", "source_name", "source_url"],
"properties": [
"entity_id",
"name",
"aliases",
"parent_name",
"category",
"spot_type",
"description",
"location_text",
"lng",
"lat",
"open_time",
"close_time",
"ticket_note",
"extra_ticket_fen",
"extra_ticket_text",
"ticket_included",
"visit_duration_min",
"is_active",
"story",
"source_name",
"source_url",
"source_section",
"cover_image_url",
"photo_urls",
"evidence_quote",
"confidence",
"review_status"
]
},
"Area": {
"description": "行政区或地理区域,例如贵州省、贵阳市、花溪区、江口县",
"properties": ["entity_id", "name", "area_level", "adcode", "parent_area", "lng", "lat", "source_name", "source_url"]
},
"Facility": {
"description": "旅游服务与基础设施,例如游客中心、停车场、售票处、厕所、摆渡车。游客会作为景点游览或路径端点的桥、亭、山、洲、湖、旧居、纪念墓、入口和码头优先归为 Attraction",
"properties": ["entity_id", "name", "facility_type", "description", "lng", "lat", "source_name", "source_url", "cover_image_url", "photo_urls", "confidence"]
},
"TransitFacility": {
"description": "景区周边交通设施,例如公交站、地铁站、火车站、客运站、停车场入口",
"properties": ["entity_id", "name", "transit_type", "line_names", "distance_text", "lng", "lat", "source_name", "source_url", "confidence"]
},
"Specialty": {
"description": "地方风味、小吃、特产或体验项目,例如丝娃娃、洋芋粑、凉粉、凉面。用于回答景区附近有什么特色吃的",
"properties": ["entity_id", "name", "specialty_type", "description", "source_name", "source_url", "source_section", "evidence_quote", "confidence"]
},
"BusLine": {
"description": "公交、中巴、大巴、地铁等公共交通线路例如90路、89路、109路、201路。入图前优先与现有公交图谱 BusLine 按线路名和城市对齐",
"properties": ["line_id", "name", "line_name", "line_type", "origin_name", "destination_name", "stop_names", "source_name", "source_url", "source_section", "confidence"]
},
"Person": {
"description": "与景区历史、文化、建设、游历相关的人物",
"properties": ["entity_id", "name", "person_type", "description", "source_name", "source_url", "evidence_quote", "confidence"]
},
"Organization": {
"description": "管理单位、建设单位、保护机构、旅游公司、宗教组织等",
"properties": ["entity_id", "name", "org_type", "description", "source_name", "source_url", "confidence"]
},
"Event": {
"description": "景区相关历史、荣誉、文化、自然、运营、交通事件。统一用 Event 节点承载event_category 用于聚合event_subtype 用于精准查询details 保存子类专属字段",
"required": ["event_id", "name", "event_category", "event_subtype", "occurred_at_text", "source_name", "source_url"],
"properties": [
"event_id",
"name",
"event_category",
"event_subtype",
"event_type",
"occurred_at_text",
"occurred_at_norm",
"start_time_norm",
"end_time_norm",
"date_granularity",
"dynasty",
"century",
"description",
"location_name",
"participants",
"details",
"source_name",
"source_url",
"source_section",
"evidence_quote",
"confidence",
"review_status"
]
},
"Concept": {
"description": "可用于检索、推荐和解释的主题概念,例如历史文化、夜游、自然生态、喀斯特地貌、红色旅游、亲子游",
"properties": ["concept_id", "name", "concept_type", "description", "source_name", "source_url", "evidence_quote", "confidence"]
},
"RouteTemplate": {
"description": "景区内部游览路线或人工策划玩法。百科交通线路不要硬造 RouteTemplate/乘车点,应抽明确线路为 BusLine 并用 ACCESSIBLE_BY 关联景区",
"properties": ["route_id", "name", "route_type", "description", "origin_names", "destination_name", "line_names", "fare_text", "duration_text", "road_names", "stops", "source_name", "source_url", "source_section", "evidence_quote", "confidence"]
},
"TransportMode": {
"description": "景区内部到达方式字典,例如步行、观光车、摆渡船、索道、景区电梯。用于统一图标、默认是否免费和查询筛选",
"properties": ["mode_id", "code", "name", "icon", "typical_is_free", "description", "is_active"]
},
"RouteSegment": {
"description": "景区内部固定通行路段,例如从大门到百步桥、从百步桥到东舍。保存到达方式、时间、费用、季节性和实测来源,用于游客问答和路径规划,不全量两两生成",
"properties": ["segment_id", "name", "scenic_area_id", "from_entity_id", "to_entity_id", "transport_mode", "transport_mode_id", "duration_min", "duration_max", "duration_text", "distance_m", "cost_fen", "cost_text", "cost_in_ticket", "is_bidirectional", "season_start", "season_end", "weather_restrict", "sort_order", "route_steps", "route_geometry", "difficulty", "accessibility_note", "verified_by", "verified_at", "source_type", "source_name", "source_url", "source_section", "evidence_quote", "confidence", "review_status", "is_active"]
},
"PathSchedule": {
"description": "观光车、摆渡船、索道等非步行路径的班次或开放时段。步行路径通常不需要班次节点",
"properties": ["schedule_id", "path_id", "schedule_type", "interval_min", "first_at", "last_at", "capacity", "season_start", "season_end", "note", "source_name", "source_url", "confidence", "is_active"]
},
"MediaAsset": {
"description": "图片、视频或页面媒体资源。媒体不是景点实体本身,必须通过 owner_entity_id/HAS_MEDIA 挂到最具体实体;景点照片不能默认挂到景区主体",
"properties": ["media_id", "url", "media_type", "media_role", "caption", "owner_entity_id", "source_name", "source_url", "source_section", "crawl_time", "confidence"]
},
"SourceDocument": {
"description": "来源文档或网页,用于整体溯源",
"properties": ["source_id", "source_name", "source_url", "title", "crawl_time", "content_hash", "publisher"]
},
"Statement": {
"description": "候选事实层,承接长尾属性、待审核事实和 SchemaGap",
"properties": [
"statement_id",
"subject_id",
"predicate",
"object_id",
"object_value",
"object_type",
"source_name",
"source_url",
"source_section",
"evidence_quote",
"confidence",
"review_status"
]
}
},
"relation_types": {
"LOCATED_IN": {"source": ["ScenicArea", "Attraction", "Facility", "TransitFacility"], "target": ["Area", "ScenicArea"], "description": "实体位于行政区、片区或景区内部"},
"PART_OF": {"source": ["Attraction", "Facility"], "target": ["ScenicArea"], "description": "景点或设施属于某景区"},
"HAS_PART": {"source": ["ScenicArea"], "target": ["Attraction", "Facility"], "description": "景区包含景点、入口、自然景观、文化点、官方服务点或设施"},
"HAS_NATURAL_FEATURE": {"source": ["ScenicArea"], "target": ["Attraction"], "description": "景区具有自然地理景观,目标 Attraction.category=natural_feature"},
"HAS_CULTURAL_SITE": {"source": ["ScenicArea"], "target": ["Attraction"], "description": "景区具有文化/历史/宗教/纪念类点位,目标 Attraction.category=cultural_site"},
"HAS_FACILITY": {"source": ["ScenicArea"], "target": ["Facility"], "description": "景区具有服务设施或基础设施"},
"HAS_EVENT": {"source": ["ScenicArea", "Attraction", "Person", "Organization"], "target": ["Event"], "description": "主体关联某事件"},
"EVENT_AT": {"source": ["Event"], "target": ["ScenicArea", "Attraction", "Area"], "description": "事件发生于某地点"},
"INVOLVES": {"source": ["Event"], "target": ["Person", "Organization", "ScenicArea"], "description": "事件涉及人物、组织或景区"},
"PARTICIPATED_IN": {"source": ["Person", "Organization"], "target": ["Event"], "description": "人物或组织参与某事件,常用于名人到访、建设、节庆、保护、影视取景"},
"AWARDED_BY": {"source": ["Event"], "target": ["Organization"], "description": "荣誉/评级/文保认定事件由某机构颁发或公布"},
"ORGANIZED_BY": {"source": ["Event"], "target": ["Organization"], "description": "节庆、演艺、展览等文化事件由某组织举办"},
"PRECEDED": {"source": ["Event"], "target": ["Event"], "description": "事件发生时间早于另一事件,用于历史时间线和流程查询"},
"PART_OF_EVENT": {"source": ["Event"], "target": ["Event"], "description": "子事件属于大型事件或阶段性事件"},
"ASSOCIATED_WITH_PERSON": {"source": ["ScenicArea", "Attraction", "Event"], "target": ["Person"], "description": "景区/景点/事件与人物存在到访、题词、建设、纪念等关联"},
"MANAGED_BY": {"source": ["ScenicArea", "Attraction"], "target": ["Organization"], "description": "景区由某组织管理或运营"},
"HAS_CONCEPT": {"source": ["ScenicArea", "Attraction", "Event"], "target": ["Concept"], "description": "主体具有某主题概念"},
"HAS_ROUTE": {"source": ["ScenicArea"], "target": ["RouteTemplate"], "description": "景区有内部游览路线或人工策划玩法"},
"ROUTE_STARTS_AT": {"source": ["RouteTemplate"], "target": ["TransitFacility", "Area"], "description": "路线从某乘车点、站点或区域出发"},
"ROUTE_ENDS_AT": {"source": ["RouteTemplate"], "target": ["ScenicArea", "Attraction"], "description": "路线到达某景区或景点"},
"ROUTE_USES_LINE": {"source": ["RouteTemplate"], "target": ["BusLine"], "description": "路线可使用某公交/中巴/大巴/地铁线路"},
"ACCESSIBLE_BY": {"source": ["ScenicArea", "Attraction"], "target": ["BusLine"], "description": "景区可由某公交/中巴/大巴线路经过或到达,需与既有 BusLine 实体对齐"},
"STOPS_AT": {"source": ["BusLine"], "target": ["TransitFacility"], "description": "交通线路停靠某站点"},
"NEARBY_ATTRACTION": {"source": ["ScenicArea"], "target": ["ScenicArea", "Attraction"], "description": "附近或联动游览景点"},
"HAS_ENTRANCE": {"source": ["ScenicArea"], "target": ["Attraction"], "description": "景区具有某入口或门区,目标 Attraction.category=entrance_gate"},
"NEARBY_SERVICE": {"source": ["ScenicArea", "Attraction"], "target": ["Attraction"], "description": "景区、景点或入口附近存在官方材料明确推荐的服务地点,目标 Attraction.category=nearby_service"},
"HAS_ROUTE_SEGMENT": {"source": ["ScenicArea"], "target": ["RouteSegment"], "description": "景区拥有一段已知内部通行路段或候选路段,对应关系型模型 attraction_path"},
"USES_TRANSPORT_MODE": {"source": ["RouteSegment"], "target": ["TransportMode"], "description": "景区内部通行路段使用某种到达方式"},
"SEGMENT_STARTS_AT": {"source": ["RouteSegment"], "target": ["Attraction"], "description": "路线段起点,等价于 attraction_path.from_id"},
"SEGMENT_ENDS_AT": {"source": ["RouteSegment"], "target": ["Attraction"], "description": "路线段终点,等价于 attraction_path.to_id"},
"HAS_SCHEDULE": {"source": ["RouteSegment"], "target": ["PathSchedule"], "description": "观光车、摆渡船、索道等路段具有班次或开放时段"},
"SCENIC_PATH_TO": {"source": ["Attraction"], "target": ["Attraction"], "description": "景区内部景点之间可通行,对应 attraction_path 的快速查询边;关系属性保存 transport_mode/duration_min/duration_max/cost_fen/cost_text/is_bidirectional/segment_id。只存官方路线、实测或高价值相邻路段任意两点通过图最短路计算"},
"HAS_SPECIALTY": {"source": ["Attraction", "ScenicArea"], "target": ["Specialty"], "description": "地点具有某类地方小吃、特产或体验"},
"NEAR_TRANSIT": {"source": ["ScenicArea", "Attraction"], "target": ["TransitFacility"], "description": "景区附近交通设施"},
"HAS_MEDIA": {"source": ["ScenicArea", "Attraction", "Facility"], "target": ["MediaAsset"], "description": "实体关联照片、视频或媒体"},
"MENTIONED_IN": {"source": ["ScenicArea", "Attraction", "Event", "Concept", "Statement"], "target": ["SourceDocument"], "description": "事实或实体来自某来源文档"},
"SAME_AS": {"source": ["ScenicArea", "Attraction", "Area"], "target": ["ScenicArea", "Attraction", "Area"], "description": "实体对齐关系,避免同一景点重复入图"},
"IN_H3_R9": {"source": ["ScenicArea", "Attraction", "Facility", "TransitFacility"], "target": ["GeoCell"], "description": "空间索引关系,用于附近召回"}
},
"event_taxonomy": {
"event_category": {
"HISTORICAL": "历史事件:始建、更名、营造、重修、管理变迁、名人到访、居住创作、纪念事件",
"HONOR": "荣誉认定:景区评级、文保认定、官方荣誉、保护名录",
"CULTURAL": "文化活动:节庆、演艺、影视取景、展览、民俗活动",
"NATURAL": "自然生态:季节景观、生态观测、水文/地质/气候观测",
"OPERATIONAL": "运营维护:开闭园、维护停业、施工建设、运营调整",
"TRANSPORTATION": "交通事件:交通开通、线路变化、接驳调整"
},
"event_subtype": {
"FOUNDING": {"category": "HISTORICAL", "label": "始建"},
"RENAMING": {"category": "HISTORICAL", "label": "更名"},
"CONSTRUCTION": {"category": "HISTORICAL", "label": "建设营造"},
"REBUILD": {"category": "HISTORICAL", "label": "重修扩建"},
"MANAGEMENT_CHANGE": {"category": "HISTORICAL", "label": "管理变更"},
"FAMOUS_VISIT": {"category": "HISTORICAL", "label": "名人到访"},
"RESIDENCE_OR_CREATION": {"category": "HISTORICAL", "label": "居住创作"},
"MEMORIAL": {"category": "HISTORICAL", "label": "纪念事件"},
"AWARD": {"category": "HONOR", "label": "荣誉评定"},
"PROTECTION_LISTED": {"category": "HONOR", "label": "文保认定"},
"FESTIVAL": {"category": "CULTURAL", "label": "节庆活动"},
"PERFORMANCE": {"category": "CULTURAL", "label": "演艺活动"},
"FILMING": {"category": "CULTURAL", "label": "影视取景"},
"EXHIBITION": {"category": "CULTURAL", "label": "展览活动"},
"CULTURAL_ACTIVITY": {"category": "CULTURAL", "label": "文化活动"},
"SEASONAL": {"category": "NATURAL", "label": "季节景观"},
"NATURAL_OBSERVATION": {"category": "NATURAL", "label": "自然观测"},
"MAINTENANCE": {"category": "OPERATIONAL", "label": "维护停业"},
"OPENING_OR_CLOSURE": {"category": "OPERATIONAL", "label": "开放闭园"},
"TRANSPORT_CHANGE": {"category": "TRANSPORTATION", "label": "交通变更"}
},
"date_policy": {
"occurred_at_text": "保留原文时间例如“明崇祯十一年1638年”“1960年4月30日”",
"start_time_norm": "规范开始时间字符串,允许 YYYY / YYYY-MM / YYYY-MM-DD",
"end_time_norm": "规范结束时间字符串,区间事件才填写",
"date_granularity": "year|month|day|range|unknown",
"dynasty_century": "能从中文纪年或年份判断时填写 dynasty/century便于按朝代/世纪查询"
},
"details_policy": {
"AWARD": ["award_name", "awarded_by_name", "award_level", "batch"],
"FAMOUS_VISIT": ["visitor_names", "visit_purpose", "work_produced"],
"RESIDENCE_OR_CREATION": ["person_names", "work_produced", "residence_reason"],
"FILMING": ["work_title", "work_type", "director_names", "actor_names", "release_year"],
"FESTIVAL": ["recurrence", "organizer_names", "expected_visitors"],
"NATURAL_OBSERVATION": ["measured_metric", "measured_value", "measured_unit"],
"MAINTENANCE": ["maintenance_reason", "affected_areas", "fully_closed"]
},
"storage_policy": "底层统一存 Event 节点和属性;若图数据库支持多 Label可额外打 Event/HISTORICAL/FAMOUS_VISIT 等标签作为索引优化,但业务查询不得只依赖多 Label。"
},
"scenic_route_network_policy": {
"purpose": "解决游客在景区内部从一个景点/入口/服务点到另一个景点/入口/服务点的到达方式、耗时、费用和路线查询;逻辑模型为 ScenicArea -> Attraction -> RouteSegment/AttractionPath",
"not_for": "城市级附近 POI 召回。附近餐饮、酒店、医疗等仍由 H3/PostGIS/高德 POI 体系处理",
"node_rule": "路径端点统一使用 Attraction。入口、自然景观、文化点、码头、观景台、官方推荐服务点等用 Attraction.category/spot_type 区分,不再让 attraction_path.from/to 指向多个不同表",
"edge_rule": "不全量两两生成 SCENIC_PATH_TO只保存文本明确给出、官方导览图标注、运营人员实测或高频推荐需要的相邻/高价值路段",
"transport_mode_values": ["walk", "sightseeing_bus", "shuttle_boat", "cableway", "elevator", "escalator", "bike", "other"],
"cost_rule": "费用用 cost_fen 保存整数分;免费或门票内包含可 cost_fen=0 并标记 cost_in_ticket=true展示使用 cost_text",
"attraction_ticket_rule": "景点若存在单独收费,写入 extra_ticket_fen/extra_ticket_text若包含在景区大门票内extra_ticket_fen=0 且 ticket_included=true未知不要编造",
"schedule_rule": "步行路径通常不建 PathSchedule观光车、摆渡船、索道等有固定时刻或间隔发车时建立 PathScheduleschedule_type=fixed|interval",
"direction_rule": "上下坡、单行观光车、摆渡船航线等不对称路径必须 is_bidirectional=false并分别存正反向路段",
"season_rule": "季节性交通方式使用 season_start/season_end 保存开放月份",
"query_rule": "用户问 A 到 B 时,先做实体对齐,再在 SCENIC_PATH_TO/RouteSegment 路网中按 duration_min 或 distance_m 求最短路径;没有实测边时再回退到地图路径规划",
"route_segment_rule": "SCENIC_PATH_TO 用于快速查询RouteSegment 用于保存路线几何、分步说明、来源、实测时间、费用和维护版本"
},
"controlled_event_types": [
"ConstructionEvent",
"RenamingEvent",
"OpeningEvent",
"ManagementChangeEvent",
"ProtectionEvent",
"HonorAwardEvent",
"VisitEvent",
"HistoricalRecordEvent",
"MemorialEvent",
"EcologyEvent",
"DevelopmentEvent",
"TransportationEvent"
],
"controlled_concept_types": [
"NaturalEcology",
"HistoryCulture",
"RedTourism",
"ReligiousCulture",
"EthnicCulture",
"NightTour",
"ParentChild",
"OutdoorHiking",
"KarstLandform",
"WaterLandscape",
"AncientTown",
"UrbanLeisure",
"ScienceEducation"
],
"statement_predicate_policy": {
"allowed_core_predicates": [
"HAS_ADDRESS",
"HAS_OPENING_HOURS",
"HAS_TICKET_PRICE",
"HAS_SCENIC_LEVEL",
"HAS_AREA",
"HAS_CLIMATE",
"HAS_REPUTATION",
"HAS_SUGGESTED_DURATION",
"HAS_BEST_SEASON",
"HAS_ALTITUDE",
"HAS_PROTECTION_LEVEL",
"HAS_HONOR",
"FORMER_NAME",
"HAS_ALIAS",
"HAS_SOURCE_URL",
"HAS_PHOTO_URL",
"HAS_FARE",
"HAS_DURATION",
"HAS_ROUTE_DISTANCE",
"HAS_ENTRANCE",
"NEARBY_SERVICE",
"HAS_ROUTE_SEGMENT",
"USES_TRANSPORT_MODE",
"SEGMENT_STARTS_AT",
"SEGMENT_ENDS_AT",
"HAS_SCHEDULE",
"SCENIC_PATH_TO",
"HAS_SPECIALTY"
],
"new_predicate_rule": "LLM 可以提出 proposal 谓词,但必须进入 schema_gaps不得直接污染正式图谱关系名"
},
"extraction_output_contract": {
"required_top_level_keys": ["entities", "events", "concepts", "relations", "statements", "media_assets", "schema_gaps", "quality"],
"id_policy": "ID 使用稳定英文前缀,如 scenic_huaxi_park、sub_huaxi_baibu_bridge、evt_huaxi_1937_build入库前还要做 Entity Alignment",
"evidence_policy": "每个候选至少包含 evidence_quote若来自百度百科还要带 source_url 和 source_section"
}
}