179 lines
7.2 KiB
JSON
179 lines
7.2 KiB
JSON
{
|
||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||
"$id": "kg_extraction_v1",
|
||
"title": "Unified KG Extraction Output",
|
||
"description": "统一知识抽取输出。每次抽取必须同时考虑 Entity、Event、Concept、Relation,并保留证据链。",
|
||
"type": "object",
|
||
"additionalProperties": false,
|
||
"required": [
|
||
"entities",
|
||
"events",
|
||
"concepts",
|
||
"relations",
|
||
"statements",
|
||
"schema_proposals",
|
||
"evidence_links"
|
||
],
|
||
"properties": {
|
||
"entities": {
|
||
"type": "array",
|
||
"items": { "$ref": "#/$defs/entity" }
|
||
},
|
||
"events": {
|
||
"type": "array",
|
||
"items": { "$ref": "#/$defs/event" }
|
||
},
|
||
"concepts": {
|
||
"type": "array",
|
||
"items": { "$ref": "#/$defs/concept" }
|
||
},
|
||
"relations": {
|
||
"type": "array",
|
||
"items": { "$ref": "#/$defs/relation" }
|
||
},
|
||
"statements": {
|
||
"type": "array",
|
||
"items": { "$ref": "#/$defs/statement" }
|
||
},
|
||
"schema_proposals": {
|
||
"type": "array",
|
||
"items": { "$ref": "#/$defs/schema_proposal" }
|
||
},
|
||
"evidence_links": {
|
||
"type": "array",
|
||
"items": { "$ref": "#/$defs/evidence_link" }
|
||
}
|
||
},
|
||
"$defs": {
|
||
"confidence": {
|
||
"type": "number",
|
||
"minimum": 0,
|
||
"maximum": 1
|
||
},
|
||
"source_span": {
|
||
"type": "object",
|
||
"additionalProperties": false,
|
||
"properties": {
|
||
"evidence_id": { "type": ["string", "integer"] },
|
||
"quote": { "type": "string" },
|
||
"start_char": { "type": ["integer", "null"], "minimum": 0 },
|
||
"end_char": { "type": ["integer", "null"], "minimum": 0 }
|
||
}
|
||
},
|
||
"entity": {
|
||
"type": "object",
|
||
"additionalProperties": false,
|
||
"required": ["temp_id", "name", "entity_type", "confidence"],
|
||
"properties": {
|
||
"temp_id": { "type": "string", "description": "本次抽取内的临时 ID,例如 ent_1" },
|
||
"name": { "type": "string" },
|
||
"entity_type": { "type": "string", "examples": ["Place", "Area", "Organization", "Person", "Facility"] },
|
||
"aliases": { "type": "array", "items": { "type": "string" }, "default": [] },
|
||
"description": { "type": "string", "default": "" },
|
||
"attributes": { "type": "object", "additionalProperties": true, "default": {} },
|
||
"spatial": {
|
||
"type": "object",
|
||
"additionalProperties": false,
|
||
"properties": {
|
||
"lng": { "type": ["number", "null"] },
|
||
"lat": { "type": ["number", "null"] },
|
||
"address": { "type": ["string", "null"] },
|
||
"adcode": { "type": ["string", "null"] },
|
||
"h3": { "type": "object", "additionalProperties": { "type": "string" } }
|
||
},
|
||
"default": {}
|
||
},
|
||
"source_spans": { "type": "array", "items": { "$ref": "#/$defs/source_span" }, "default": [] },
|
||
"confidence": { "$ref": "#/$defs/confidence" }
|
||
}
|
||
},
|
||
"event": {
|
||
"type": "object",
|
||
"additionalProperties": false,
|
||
"required": ["temp_id", "title", "event_type", "confidence"],
|
||
"properties": {
|
||
"temp_id": { "type": "string", "description": "本次抽取内的临时 ID,例如 evt_1" },
|
||
"title": { "type": "string" },
|
||
"event_type": { "type": "string", "examples": ["OpeningEvent", "HistoricalEvent", "FestivalEvent"] },
|
||
"time_text": { "type": "string", "default": "", "description": "事件发生时间原文,发布入图时映射为 event_date/event_time" },
|
||
"time_norm": { "type": "string", "default": "", "description": "事件时间规范值,发布入图时映射为 event_date_norm,例如 1937 或 1960-04-30" },
|
||
"description": { "type": "string", "default": "" },
|
||
"participants": { "type": "array", "items": { "type": "string" }, "default": [] },
|
||
"location_ref": { "type": ["string", "null"], "description": "可指向 entity.temp_id" },
|
||
"source_spans": { "type": "array", "items": { "$ref": "#/$defs/source_span" }, "default": [] },
|
||
"confidence": { "$ref": "#/$defs/confidence" }
|
||
}
|
||
},
|
||
"concept": {
|
||
"type": "object",
|
||
"additionalProperties": false,
|
||
"required": ["temp_id", "name", "concept_type", "confidence"],
|
||
"properties": {
|
||
"temp_id": { "type": "string", "description": "本次抽取内的临时 ID,例如 cpt_1" },
|
||
"name": { "type": "string" },
|
||
"concept_type": { "type": "string", "examples": ["Scene", "Category", "Topic", "Audience", "Experience"] },
|
||
"description": { "type": "string", "default": "" },
|
||
"parent_concepts": { "type": "array", "items": { "type": "string" }, "default": [] },
|
||
"source_spans": { "type": "array", "items": { "$ref": "#/$defs/source_span" }, "default": [] },
|
||
"confidence": { "$ref": "#/$defs/confidence" }
|
||
}
|
||
},
|
||
"relation": {
|
||
"type": "object",
|
||
"additionalProperties": false,
|
||
"required": ["relation_type", "source_type", "target_type", "confidence"],
|
||
"properties": {
|
||
"relation_type": { "type": "string", "examples": ["LOCATED_IN", "HAS_CONCEPT", "HAS_EVENT", "OPERATED_BY"] },
|
||
"source_type": { "type": "string" },
|
||
"target_type": { "type": "string" },
|
||
"description": { "type": "string", "default": "" },
|
||
"inverse_relation": { "type": ["string", "null"] },
|
||
"confidence": { "$ref": "#/$defs/confidence" }
|
||
}
|
||
},
|
||
"statement": {
|
||
"type": "object",
|
||
"additionalProperties": false,
|
||
"required": ["subject_ref", "predicate", "object_ref", "object_kind", "confidence"],
|
||
"properties": {
|
||
"subject_ref": { "type": "string", "description": "entity/event/concept temp_id 或已知 natural_key" },
|
||
"predicate": { "type": "string" },
|
||
"object_ref": { "type": "string" },
|
||
"object_kind": { "type": "string", "enum": ["entity", "event", "concept", "literal"] },
|
||
"qualifiers": { "type": "object", "additionalProperties": true, "default": {} },
|
||
"source_spans": { "type": "array", "items": { "$ref": "#/$defs/source_span" }, "default": [] },
|
||
"confidence": { "$ref": "#/$defs/confidence" }
|
||
}
|
||
},
|
||
"schema_proposal": {
|
||
"type": "object",
|
||
"additionalProperties": false,
|
||
"required": ["proposal_type", "name", "reason", "confidence"],
|
||
"properties": {
|
||
"proposal_type": {
|
||
"type": "string",
|
||
"enum": ["entity_type", "event_type", "concept_type", "relation_type", "field"]
|
||
},
|
||
"name": { "type": "string" },
|
||
"source_type": { "type": ["string", "null"] },
|
||
"target_type": { "type": ["string", "null"] },
|
||
"value_type": { "type": ["string", "null"], "examples": ["string", "number", "date", "boolean", "geo"] },
|
||
"reason": { "type": "string" },
|
||
"examples": { "type": "array", "items": { "type": "string" }, "default": [] },
|
||
"confidence": { "$ref": "#/$defs/confidence" }
|
||
}
|
||
},
|
||
"evidence_link": {
|
||
"type": "object",
|
||
"additionalProperties": false,
|
||
"required": ["target_ref", "evidence_id", "support_type"],
|
||
"properties": {
|
||
"target_ref": { "type": "string" },
|
||
"evidence_id": { "type": ["string", "integer"] },
|
||
"support_type": { "type": "string", "enum": ["supports", "contradicts", "mentions"] },
|
||
"quote": { "type": "string", "default": "" }
|
||
}
|
||
}
|
||
}
|
||
}
|