Files
bxh/docs/kg-redesign/scenic_spot_schema_v0_2.json

375 lines
24 KiB
JSON
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"schema_id": "scenic_spot_schema_v0_3",
"schema_name": "通用景区知识图谱 Schema v0.3",
"status": "draft_for_extraction_test",
"target_coverage": "覆盖百度百科/高德 POI/人工材料中约 80% 的通用景区知识,差异化景区通过 SchemaGap 后续扩展",
"principles": [
"稳定高频事实作为节点属性,例如地址、开放时间、门票、等级、建议游玩时长、经纬度、来源 URL、照片 URL",
"实体之间的语义联系作为关系,例如景区位于区域、包含景点、关联历史事件、关联人物、附近景点、附近交通",
"不稳定、长尾、争议或新字段先进入 Statement/Attribute经过审核后再决定是否升级为正式字段或关系",
"普通展示照片回填到最具体实体的 photo_urls/cover_image_url导游图、导览图、线路图、全景图等功能型媒体作为 MediaAsset 独立保存 media_role并挂到景区主体的 guide_map_urls/route_map_urls/panorama_urls",
"所有抽取结果必须带 evidence_quote、source_name、source_url、source_section、confidence方便人工复核和溯源",
"空间能力不放在自然语言抽取里临时生成,必须在入图时统一写入 lat/lng/adcode/h3_r6-h3_r10/GeoCell 关系",
"Event 使用统一 Event 节点,必须同时保存 event_category/event_subtype/event_type/start_time_norm/end_time_norm/date_granularityevent_type 仅兼容旧系统,查询优先使用分类和时间字段"
],
"node_types": {
"ScenicArea": {
"description": "景区/旅游目的地主体,例如花溪公园、梵净山、青岩古镇、遵义会议会址",
"required": ["entity_id", "name", "entity_type", "source_name", "source_url"],
"properties": [
"entity_id",
"name",
"canonical_name",
"aliases",
"foreign_name",
"entity_type",
"scenic_category",
"scenic_level",
"description",
"reputation",
"country",
"province",
"city",
"district",
"address",
"location_text",
"lng",
"lat",
"adcode",
"h3_r6",
"h3_r7",
"h3_r8",
"h3_r9",
"h3_r10",
"climate",
"area_size",
"altitude",
"terrain",
"water_system",
"opening_hours",
"ticket_price",
"suggested_duration",
"best_season",
"famous_spots_text",
"nearby_attractions_text",
"service_features",
"cover_image_url",
"photo_urls",
"guide_map_urls",
"route_map_urls",
"panorama_urls",
"source_name",
"source_url",
"crawl_time",
"last_updated",
"confidence",
"review_status"
]
},
"Attraction": {
"description": "景区内部可游览、可搜索或可作为路径端点的景点节点,例如百步桥、麟山、金顶、蘑菇石、大门、码头、观景台。自然景观、文化点、入口等都通过 category/spot_type 区分,路径 from/to 统一指向 Attraction",
"required": ["entity_id", "name", "parent_name", "source_name", "source_url"],
"properties": [
"entity_id",
"name",
"aliases",
"parent_name",
"category",
"spot_type",
"description",
"location_text",
"lng",
"lat",
"open_time",
"close_time",
"ticket_note",
"extra_ticket_fen",
"extra_ticket_text",
"ticket_included",
"visit_duration_min",
"is_active",
"story",
"source_name",
"source_url",
"source_section",
"cover_image_url",
"photo_urls",
"evidence_quote",
"confidence",
"review_status"
]
},
"Area": {
"description": "行政区或地理区域,例如贵州省、贵阳市、花溪区、江口县",
"properties": ["entity_id", "name", "area_level", "adcode", "parent_area", "lng", "lat", "source_name", "source_url"]
},
"Facility": {
"description": "旅游服务与基础设施,例如游客中心、停车场、售票处、厕所、摆渡车。游客会作为景点游览或路径端点的桥、亭、山、洲、湖、旧居、纪念墓、入口和码头优先归为 Attraction",
"properties": ["entity_id", "name", "facility_type", "description", "lng", "lat", "source_name", "source_url", "cover_image_url", "photo_urls", "confidence"]
},
"TransitFacility": {
"description": "景区周边交通设施,例如公交站、地铁站、火车站、客运站、停车场入口",
"properties": ["entity_id", "name", "transit_type", "line_names", "distance_text", "lng", "lat", "source_name", "source_url", "confidence"]
},
"Specialty": {
"description": "地方风味、小吃、特产或体验项目,例如丝娃娃、洋芋粑、凉粉、凉面。用于回答景区附近有什么特色吃的",
"properties": ["entity_id", "name", "specialty_type", "description", "source_name", "source_url", "source_section", "evidence_quote", "confidence"]
},
"BusLine": {
"description": "公交、中巴、大巴、地铁等公共交通线路例如90路、89路、109路、201路。入图前优先与现有公交图谱 BusLine 按线路名和城市对齐",
"properties": ["line_id", "name", "line_name", "line_type", "origin_name", "destination_name", "stop_names", "source_name", "source_url", "source_section", "confidence"]
},
"Person": {
"description": "与景区历史、文化、建设、游历相关的人物",
"properties": ["entity_id", "name", "person_type", "description", "source_name", "source_url", "evidence_quote", "confidence"]
},
"Organization": {
"description": "管理单位、建设单位、保护机构、旅游公司、宗教组织等",
"properties": ["entity_id", "name", "org_type", "description", "source_name", "source_url", "confidence"]
},
"Event": {
"description": "景区相关历史、荣誉、文化、自然、运营、交通事件。统一用 Event 节点承载event_category 用于聚合event_subtype 用于精准查询details 保存子类专属字段",
"required": ["event_id", "name", "event_category", "event_subtype", "occurred_at_text", "source_name", "source_url"],
"properties": [
"event_id",
"name",
"event_category",
"event_subtype",
"event_type",
"occurred_at_text",
"occurred_at_norm",
"start_time_norm",
"end_time_norm",
"date_granularity",
"dynasty",
"century",
"description",
"location_name",
"participants",
"details",
"source_name",
"source_url",
"source_section",
"evidence_quote",
"confidence",
"review_status"
]
},
"Concept": {
"description": "可用于检索、推荐和解释的主题概念,例如历史文化、夜游、自然生态、喀斯特地貌、红色旅游、亲子游",
"properties": ["concept_id", "name", "concept_type", "description", "source_name", "source_url", "evidence_quote", "confidence"]
},
"RouteTemplate": {
"description": "景区内部游览路线或人工策划玩法。百科交通线路不要硬造 RouteTemplate/乘车点,应抽明确线路为 BusLine 并用 ACCESSIBLE_BY 关联景区",
"properties": ["route_id", "name", "route_type", "description", "origin_names", "destination_name", "line_names", "fare_text", "duration_text", "road_names", "stops", "source_name", "source_url", "source_section", "evidence_quote", "confidence"]
},
"TransportMode": {
"description": "景区内部到达方式字典,例如步行、观光车、摆渡船、索道、景区电梯。用于统一图标、默认是否免费和查询筛选",
"properties": ["mode_id", "code", "name", "icon", "typical_is_free", "description", "is_active"]
},
"RouteSegment": {
"description": "景区内部固定通行路段,例如从大门到百步桥、从百步桥到东舍。保存到达方式、时间、费用、季节性和实测来源,用于游客问答和路径规划,不全量两两生成",
"properties": ["segment_id", "name", "scenic_area_id", "from_entity_id", "to_entity_id", "transport_mode", "transport_mode_id", "duration_min", "duration_max", "duration_text", "distance_m", "cost_fen", "cost_text", "cost_in_ticket", "is_bidirectional", "season_start", "season_end", "weather_restrict", "sort_order", "route_steps", "route_geometry", "difficulty", "accessibility_note", "verified_by", "verified_at", "source_type", "source_name", "source_url", "source_section", "evidence_quote", "confidence", "review_status", "is_active"]
},
"PathSchedule": {
"description": "观光车、摆渡船、索道等非步行路径的班次或开放时段。步行路径通常不需要班次节点",
"properties": ["schedule_id", "path_id", "schedule_type", "interval_min", "first_at", "last_at", "capacity", "season_start", "season_end", "note", "source_name", "source_url", "confidence", "is_active"]
},
"MediaAsset": {
"description": "图片、视频或页面媒体资源。媒体不是景点实体本身,必须通过 owner_entity_id/HAS_MEDIA 挂到最具体实体;景点照片不能默认挂到景区主体",
"properties": ["media_id", "url", "media_type", "media_role", "caption", "owner_entity_id", "source_name", "source_url", "source_section", "crawl_time", "confidence"]
},
"SourceDocument": {
"description": "来源文档或网页,用于整体溯源",
"properties": ["source_id", "source_name", "source_url", "title", "crawl_time", "content_hash", "publisher"]
},
"Statement": {
"description": "候选事实层,承接长尾属性、待审核事实和 SchemaGap",
"properties": [
"statement_id",
"subject_id",
"predicate",
"object_id",
"object_value",
"object_type",
"source_name",
"source_url",
"source_section",
"evidence_quote",
"confidence",
"review_status"
]
}
},
"relation_types": {
"LOCATED_IN": {"source": ["ScenicArea", "Attraction", "Facility", "TransitFacility"], "target": ["Area", "ScenicArea"], "description": "实体位于行政区、片区或景区内部"},
"PART_OF": {"source": ["Attraction", "Facility"], "target": ["ScenicArea"], "description": "景点或设施属于某景区"},
"HAS_PART": {"source": ["ScenicArea"], "target": ["Attraction", "Facility"], "description": "景区包含景点、入口、自然景观、文化点、官方服务点或设施"},
"HAS_NATURAL_FEATURE": {"source": ["ScenicArea"], "target": ["Attraction"], "description": "景区具有自然地理景观,目标 Attraction.category=natural_feature"},
"HAS_CULTURAL_SITE": {"source": ["ScenicArea"], "target": ["Attraction"], "description": "景区具有文化/历史/宗教/纪念类点位,目标 Attraction.category=cultural_site"},
"HAS_FACILITY": {"source": ["ScenicArea"], "target": ["Facility"], "description": "景区具有服务设施或基础设施"},
"HAS_EVENT": {"source": ["ScenicArea", "Attraction", "Person", "Organization"], "target": ["Event"], "description": "主体关联某事件"},
"EVENT_AT": {"source": ["Event"], "target": ["ScenicArea", "Attraction", "Area"], "description": "事件发生于某地点"},
"INVOLVES": {"source": ["Event"], "target": ["Person", "Organization", "ScenicArea"], "description": "事件涉及人物、组织或景区"},
"PARTICIPATED_IN": {"source": ["Person", "Organization"], "target": ["Event"], "description": "人物或组织参与某事件,常用于名人到访、建设、节庆、保护、影视取景"},
"AWARDED_BY": {"source": ["Event"], "target": ["Organization"], "description": "荣誉/评级/文保认定事件由某机构颁发或公布"},
"ORGANIZED_BY": {"source": ["Event"], "target": ["Organization"], "description": "节庆、演艺、展览等文化事件由某组织举办"},
"PRECEDED": {"source": ["Event"], "target": ["Event"], "description": "事件发生时间早于另一事件,用于历史时间线和流程查询"},
"PART_OF_EVENT": {"source": ["Event"], "target": ["Event"], "description": "子事件属于大型事件或阶段性事件"},
"ASSOCIATED_WITH_PERSON": {"source": ["ScenicArea", "Attraction", "Event"], "target": ["Person"], "description": "景区/景点/事件与人物存在到访、题词、建设、纪念等关联"},
"MANAGED_BY": {"source": ["ScenicArea", "Attraction"], "target": ["Organization"], "description": "景区由某组织管理或运营"},
"HAS_CONCEPT": {"source": ["ScenicArea", "Attraction", "Event"], "target": ["Concept"], "description": "主体具有某主题概念"},
"HAS_ROUTE": {"source": ["ScenicArea"], "target": ["RouteTemplate"], "description": "景区有内部游览路线或人工策划玩法"},
"ROUTE_STARTS_AT": {"source": ["RouteTemplate"], "target": ["TransitFacility", "Area"], "description": "路线从某乘车点、站点或区域出发"},
"ROUTE_ENDS_AT": {"source": ["RouteTemplate"], "target": ["ScenicArea", "Attraction"], "description": "路线到达某景区或景点"},
"ROUTE_USES_LINE": {"source": ["RouteTemplate"], "target": ["BusLine"], "description": "路线可使用某公交/中巴/大巴/地铁线路"},
"ACCESSIBLE_BY": {"source": ["ScenicArea", "Attraction"], "target": ["BusLine"], "description": "景区可由某公交/中巴/大巴线路经过或到达,需与既有 BusLine 实体对齐"},
"STOPS_AT": {"source": ["BusLine"], "target": ["TransitFacility"], "description": "交通线路停靠某站点"},
"NEARBY_ATTRACTION": {"source": ["ScenicArea"], "target": ["ScenicArea", "Attraction"], "description": "附近或联动游览景点"},
"HAS_ENTRANCE": {"source": ["ScenicArea"], "target": ["Attraction"], "description": "景区具有某入口或门区,目标 Attraction.category=entrance_gate"},
"NEARBY_SERVICE": {"source": ["ScenicArea", "Attraction"], "target": ["Attraction"], "description": "景区、景点或入口附近存在官方材料明确推荐的服务地点,目标 Attraction.category=nearby_service"},
"HAS_ROUTE_SEGMENT": {"source": ["ScenicArea"], "target": ["RouteSegment"], "description": "景区拥有一段已知内部通行路段或候选路段,对应关系型模型 attraction_path"},
"USES_TRANSPORT_MODE": {"source": ["RouteSegment"], "target": ["TransportMode"], "description": "景区内部通行路段使用某种到达方式"},
"SEGMENT_STARTS_AT": {"source": ["RouteSegment"], "target": ["Attraction"], "description": "路线段起点,等价于 attraction_path.from_id"},
"SEGMENT_ENDS_AT": {"source": ["RouteSegment"], "target": ["Attraction"], "description": "路线段终点,等价于 attraction_path.to_id"},
"HAS_SCHEDULE": {"source": ["RouteSegment"], "target": ["PathSchedule"], "description": "观光车、摆渡船、索道等路段具有班次或开放时段"},
"SCENIC_PATH_TO": {"source": ["Attraction"], "target": ["Attraction"], "description": "景区内部景点之间可通行,对应 attraction_path 的快速查询边;关系属性保存 transport_mode/duration_min/duration_max/cost_fen/cost_text/is_bidirectional/segment_id。只存官方路线、实测或高价值相邻路段任意两点通过图最短路计算"},
"HAS_SPECIALTY": {"source": ["Attraction", "ScenicArea"], "target": ["Specialty"], "description": "地点具有某类地方小吃、特产或体验"},
"NEAR_TRANSIT": {"source": ["ScenicArea", "Attraction"], "target": ["TransitFacility"], "description": "景区附近交通设施"},
"HAS_MEDIA": {"source": ["ScenicArea", "Attraction", "Facility"], "target": ["MediaAsset"], "description": "实体关联照片、视频或媒体"},
"MENTIONED_IN": {"source": ["ScenicArea", "Attraction", "Event", "Concept", "Statement"], "target": ["SourceDocument"], "description": "事实或实体来自某来源文档"},
"SAME_AS": {"source": ["ScenicArea", "Attraction", "Area"], "target": ["ScenicArea", "Attraction", "Area"], "description": "实体对齐关系,避免同一景点重复入图"},
"IN_H3_R9": {"source": ["ScenicArea", "Attraction", "Facility", "TransitFacility"], "target": ["GeoCell"], "description": "空间索引关系,用于附近召回"}
},
"event_taxonomy": {
"event_category": {
"HISTORICAL": "历史事件:始建、更名、营造、重修、管理变迁、名人到访、居住创作、纪念事件",
"HONOR": "荣誉认定:景区评级、文保认定、官方荣誉、保护名录",
"CULTURAL": "文化活动:节庆、演艺、影视取景、展览、民俗活动",
"NATURAL": "自然生态:季节景观、生态观测、水文/地质/气候观测",
"OPERATIONAL": "运营维护:开闭园、维护停业、施工建设、运营调整",
"TRANSPORTATION": "交通事件:交通开通、线路变化、接驳调整"
},
"event_subtype": {
"FOUNDING": {"category": "HISTORICAL", "label": "始建"},
"RENAMING": {"category": "HISTORICAL", "label": "更名"},
"CONSTRUCTION": {"category": "HISTORICAL", "label": "建设营造"},
"REBUILD": {"category": "HISTORICAL", "label": "重修扩建"},
"MANAGEMENT_CHANGE": {"category": "HISTORICAL", "label": "管理变更"},
"FAMOUS_VISIT": {"category": "HISTORICAL", "label": "名人到访"},
"RESIDENCE_OR_CREATION": {"category": "HISTORICAL", "label": "居住创作"},
"MEMORIAL": {"category": "HISTORICAL", "label": "纪念事件"},
"AWARD": {"category": "HONOR", "label": "荣誉评定"},
"PROTECTION_LISTED": {"category": "HONOR", "label": "文保认定"},
"FESTIVAL": {"category": "CULTURAL", "label": "节庆活动"},
"PERFORMANCE": {"category": "CULTURAL", "label": "演艺活动"},
"FILMING": {"category": "CULTURAL", "label": "影视取景"},
"EXHIBITION": {"category": "CULTURAL", "label": "展览活动"},
"CULTURAL_ACTIVITY": {"category": "CULTURAL", "label": "文化活动"},
"SEASONAL": {"category": "NATURAL", "label": "季节景观"},
"NATURAL_OBSERVATION": {"category": "NATURAL", "label": "自然观测"},
"MAINTENANCE": {"category": "OPERATIONAL", "label": "维护停业"},
"OPENING_OR_CLOSURE": {"category": "OPERATIONAL", "label": "开放闭园"},
"TRANSPORT_CHANGE": {"category": "TRANSPORTATION", "label": "交通变更"}
},
"date_policy": {
"occurred_at_text": "保留原文时间例如“明崇祯十一年1638年”“1960年4月30日”",
"start_time_norm": "规范开始时间字符串,允许 YYYY / YYYY-MM / YYYY-MM-DD",
"end_time_norm": "规范结束时间字符串,区间事件才填写",
"date_granularity": "year|month|day|range|unknown",
"dynasty_century": "能从中文纪年或年份判断时填写 dynasty/century便于按朝代/世纪查询"
},
"details_policy": {
"AWARD": ["award_name", "awarded_by_name", "award_level", "batch"],
"FAMOUS_VISIT": ["visitor_names", "visit_purpose", "work_produced"],
"RESIDENCE_OR_CREATION": ["person_names", "work_produced", "residence_reason"],
"FILMING": ["work_title", "work_type", "director_names", "actor_names", "release_year"],
"FESTIVAL": ["recurrence", "organizer_names", "expected_visitors"],
"NATURAL_OBSERVATION": ["measured_metric", "measured_value", "measured_unit"],
"MAINTENANCE": ["maintenance_reason", "affected_areas", "fully_closed"]
},
"storage_policy": "底层统一存 Event 节点和属性;若图数据库支持多 Label可额外打 Event/HISTORICAL/FAMOUS_VISIT 等标签作为索引优化,但业务查询不得只依赖多 Label。"
},
"scenic_route_network_policy": {
"purpose": "解决游客在景区内部从一个景点/入口/服务点到另一个景点/入口/服务点的到达方式、耗时、费用和路线查询;逻辑模型为 ScenicArea -> Attraction -> RouteSegment/AttractionPath",
"not_for": "城市级附近 POI 召回。附近餐饮、酒店、医疗等仍由 H3/PostGIS/高德 POI 体系处理",
"node_rule": "路径端点统一使用 Attraction。入口、自然景观、文化点、码头、观景台、官方推荐服务点等用 Attraction.category/spot_type 区分,不再让 attraction_path.from/to 指向多个不同表",
"edge_rule": "不全量两两生成 SCENIC_PATH_TO只保存文本明确给出、官方导览图标注、运营人员实测或高频推荐需要的相邻/高价值路段",
"transport_mode_values": ["walk", "sightseeing_bus", "shuttle_boat", "cableway", "elevator", "escalator", "bike", "other"],
"cost_rule": "费用用 cost_fen 保存整数分;免费或门票内包含可 cost_fen=0 并标记 cost_in_ticket=true展示使用 cost_text",
"attraction_ticket_rule": "景点若存在单独收费,写入 extra_ticket_fen/extra_ticket_text若包含在景区大门票内extra_ticket_fen=0 且 ticket_included=true未知不要编造",
"schedule_rule": "步行路径通常不建 PathSchedule观光车、摆渡船、索道等有固定时刻或间隔发车时建立 PathScheduleschedule_type=fixed|interval",
"direction_rule": "上下坡、单行观光车、摆渡船航线等不对称路径必须 is_bidirectional=false并分别存正反向路段",
"season_rule": "季节性交通方式使用 season_start/season_end 保存开放月份",
"query_rule": "用户问 A 到 B 时,先做实体对齐,再在 SCENIC_PATH_TO/RouteSegment 路网中按 duration_min 或 distance_m 求最短路径;没有实测边时再回退到地图路径规划",
"route_segment_rule": "SCENIC_PATH_TO 用于快速查询RouteSegment 用于保存路线几何、分步说明、来源、实测时间、费用和维护版本"
},
"controlled_event_types": [
"ConstructionEvent",
"RenamingEvent",
"OpeningEvent",
"ManagementChangeEvent",
"ProtectionEvent",
"HonorAwardEvent",
"VisitEvent",
"HistoricalRecordEvent",
"MemorialEvent",
"EcologyEvent",
"DevelopmentEvent",
"TransportationEvent"
],
"controlled_concept_types": [
"NaturalEcology",
"HistoryCulture",
"RedTourism",
"ReligiousCulture",
"EthnicCulture",
"NightTour",
"ParentChild",
"OutdoorHiking",
"KarstLandform",
"WaterLandscape",
"AncientTown",
"UrbanLeisure",
"ScienceEducation"
],
"statement_predicate_policy": {
"allowed_core_predicates": [
"HAS_ADDRESS",
"HAS_OPENING_HOURS",
"HAS_TICKET_PRICE",
"HAS_SCENIC_LEVEL",
"HAS_AREA",
"HAS_CLIMATE",
"HAS_REPUTATION",
"HAS_SUGGESTED_DURATION",
"HAS_BEST_SEASON",
"HAS_ALTITUDE",
"HAS_PROTECTION_LEVEL",
"HAS_HONOR",
"FORMER_NAME",
"HAS_ALIAS",
"HAS_SOURCE_URL",
"HAS_PHOTO_URL",
"HAS_FARE",
"HAS_DURATION",
"HAS_ROUTE_DISTANCE",
"HAS_ENTRANCE",
"NEARBY_SERVICE",
"HAS_ROUTE_SEGMENT",
"USES_TRANSPORT_MODE",
"SEGMENT_STARTS_AT",
"SEGMENT_ENDS_AT",
"HAS_SCHEDULE",
"SCENIC_PATH_TO",
"HAS_SPECIALTY"
],
"new_predicate_rule": "LLM 可以提出 proposal 谓词,但必须进入 schema_gaps不得直接污染正式图谱关系名"
},
"extraction_output_contract": {
"required_top_level_keys": ["entities", "events", "concepts", "relations", "statements", "media_assets", "schema_gaps", "quality"],
"id_policy": "ID 使用稳定英文前缀,如 scenic_huaxi_park、sub_huaxi_baibu_bridge、evt_huaxi_1937_build入库前还要做 Entity Alignment",
"evidence_policy": "每个候选至少包含 evidence_quote若来自百度百科还要带 source_url 和 source_section"
}
}