Files
bxh/app/schemas/kg_extraction_v1.schema.json

179 lines
7.2 KiB
JSON
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "kg_extraction_v1",
"title": "Unified KG Extraction Output",
"description": "统一知识抽取输出。每次抽取必须同时考虑 Entity、Event、Concept、Relation并保留证据链。",
"type": "object",
"additionalProperties": false,
"required": [
"entities",
"events",
"concepts",
"relations",
"statements",
"schema_proposals",
"evidence_links"
],
"properties": {
"entities": {
"type": "array",
"items": { "$ref": "#/$defs/entity" }
},
"events": {
"type": "array",
"items": { "$ref": "#/$defs/event" }
},
"concepts": {
"type": "array",
"items": { "$ref": "#/$defs/concept" }
},
"relations": {
"type": "array",
"items": { "$ref": "#/$defs/relation" }
},
"statements": {
"type": "array",
"items": { "$ref": "#/$defs/statement" }
},
"schema_proposals": {
"type": "array",
"items": { "$ref": "#/$defs/schema_proposal" }
},
"evidence_links": {
"type": "array",
"items": { "$ref": "#/$defs/evidence_link" }
}
},
"$defs": {
"confidence": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"source_span": {
"type": "object",
"additionalProperties": false,
"properties": {
"evidence_id": { "type": ["string", "integer"] },
"quote": { "type": "string" },
"start_char": { "type": ["integer", "null"], "minimum": 0 },
"end_char": { "type": ["integer", "null"], "minimum": 0 }
}
},
"entity": {
"type": "object",
"additionalProperties": false,
"required": ["temp_id", "name", "entity_type", "confidence"],
"properties": {
"temp_id": { "type": "string", "description": "本次抽取内的临时 ID例如 ent_1" },
"name": { "type": "string" },
"entity_type": { "type": "string", "examples": ["Place", "Area", "Organization", "Person", "Facility"] },
"aliases": { "type": "array", "items": { "type": "string" }, "default": [] },
"description": { "type": "string", "default": "" },
"attributes": { "type": "object", "additionalProperties": true, "default": {} },
"spatial": {
"type": "object",
"additionalProperties": false,
"properties": {
"lng": { "type": ["number", "null"] },
"lat": { "type": ["number", "null"] },
"address": { "type": ["string", "null"] },
"adcode": { "type": ["string", "null"] },
"h3": { "type": "object", "additionalProperties": { "type": "string" } }
},
"default": {}
},
"source_spans": { "type": "array", "items": { "$ref": "#/$defs/source_span" }, "default": [] },
"confidence": { "$ref": "#/$defs/confidence" }
}
},
"event": {
"type": "object",
"additionalProperties": false,
"required": ["temp_id", "title", "event_type", "confidence"],
"properties": {
"temp_id": { "type": "string", "description": "本次抽取内的临时 ID例如 evt_1" },
"title": { "type": "string" },
"event_type": { "type": "string", "examples": ["OpeningEvent", "HistoricalEvent", "FestivalEvent"] },
"time_text": { "type": "string", "default": "", "description": "事件发生时间原文,发布入图时映射为 event_date/event_time" },
"time_norm": { "type": "string", "default": "", "description": "事件时间规范值,发布入图时映射为 event_date_norm例如 1937 或 1960-04-30" },
"description": { "type": "string", "default": "" },
"participants": { "type": "array", "items": { "type": "string" }, "default": [] },
"location_ref": { "type": ["string", "null"], "description": "可指向 entity.temp_id" },
"source_spans": { "type": "array", "items": { "$ref": "#/$defs/source_span" }, "default": [] },
"confidence": { "$ref": "#/$defs/confidence" }
}
},
"concept": {
"type": "object",
"additionalProperties": false,
"required": ["temp_id", "name", "concept_type", "confidence"],
"properties": {
"temp_id": { "type": "string", "description": "本次抽取内的临时 ID例如 cpt_1" },
"name": { "type": "string" },
"concept_type": { "type": "string", "examples": ["Scene", "Category", "Topic", "Audience", "Experience"] },
"description": { "type": "string", "default": "" },
"parent_concepts": { "type": "array", "items": { "type": "string" }, "default": [] },
"source_spans": { "type": "array", "items": { "$ref": "#/$defs/source_span" }, "default": [] },
"confidence": { "$ref": "#/$defs/confidence" }
}
},
"relation": {
"type": "object",
"additionalProperties": false,
"required": ["relation_type", "source_type", "target_type", "confidence"],
"properties": {
"relation_type": { "type": "string", "examples": ["LOCATED_IN", "HAS_CONCEPT", "HAS_EVENT", "OPERATED_BY"] },
"source_type": { "type": "string" },
"target_type": { "type": "string" },
"description": { "type": "string", "default": "" },
"inverse_relation": { "type": ["string", "null"] },
"confidence": { "$ref": "#/$defs/confidence" }
}
},
"statement": {
"type": "object",
"additionalProperties": false,
"required": ["subject_ref", "predicate", "object_ref", "object_kind", "confidence"],
"properties": {
"subject_ref": { "type": "string", "description": "entity/event/concept temp_id 或已知 natural_key" },
"predicate": { "type": "string" },
"object_ref": { "type": "string" },
"object_kind": { "type": "string", "enum": ["entity", "event", "concept", "literal"] },
"qualifiers": { "type": "object", "additionalProperties": true, "default": {} },
"source_spans": { "type": "array", "items": { "$ref": "#/$defs/source_span" }, "default": [] },
"confidence": { "$ref": "#/$defs/confidence" }
}
},
"schema_proposal": {
"type": "object",
"additionalProperties": false,
"required": ["proposal_type", "name", "reason", "confidence"],
"properties": {
"proposal_type": {
"type": "string",
"enum": ["entity_type", "event_type", "concept_type", "relation_type", "field"]
},
"name": { "type": "string" },
"source_type": { "type": ["string", "null"] },
"target_type": { "type": ["string", "null"] },
"value_type": { "type": ["string", "null"], "examples": ["string", "number", "date", "boolean", "geo"] },
"reason": { "type": "string" },
"examples": { "type": "array", "items": { "type": "string" }, "default": [] },
"confidence": { "$ref": "#/$defs/confidence" }
}
},
"evidence_link": {
"type": "object",
"additionalProperties": false,
"required": ["target_ref", "evidence_id", "support_type"],
"properties": {
"target_ref": { "type": "string" },
"evidence_id": { "type": ["string", "integer"] },
"support_type": { "type": "string", "enum": ["supports", "contradicts", "mentions"] },
"quote": { "type": "string", "default": "" }
}
}
}
}