Initial travel knowledge graph release
This commit is contained in:
141
app/agents/schema_lint.py
Normal file
141
app/agents/schema_lint.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""Phase 1 — Schema Lint Agent.
|
||||
|
||||
Validates an ontology schema for completeness, consistency, and best practices.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from app.config import settings
|
||||
from app.llm_client import LlmClient
|
||||
|
||||
SYSTEM_PROMPT = """你是一个知识图谱 Schema 审计员。给一个 ontology schema,检查它的问题。输出 JSON。
|
||||
|
||||
检查维度:
|
||||
1. 类型覆盖率 — 常用实体类型(POI, Restaurant, Hotel, Attraction)是否都有
|
||||
2. 字段完整性 — 每个实体类型的核心字段是否齐备
|
||||
3. 关系连通性 — 实体类型之间是否有合理的关系类型
|
||||
4. 命名字段一致性 — 同类字段在不同实体类型下命名是否一致
|
||||
5. 必填字段合理性 — 标记为 required 的字段是否确实必要
|
||||
|
||||
问题严重度:
|
||||
- error: 会导致图谱不可用
|
||||
- warning: 建议修复
|
||||
- info: 可选优化
|
||||
|
||||
只输出 JSON。"""
|
||||
|
||||
USER_TEMPLATE = """Ontology Schema:
|
||||
{schema_json}
|
||||
|
||||
输出 schema:
|
||||
{{
|
||||
"issues": [
|
||||
{{
|
||||
"severity": "error|warning|info",
|
||||
"code": "missing_entity_type|missing_field|orphan_type|naming_inconsistency",
|
||||
"target": "entity_type.field_name",
|
||||
"message": "中文描述",
|
||||
"suggestion": "修复建议"
|
||||
}}
|
||||
],
|
||||
"score": 0~100,
|
||||
"summary": "一句话总结"
|
||||
}}"""
|
||||
|
||||
|
||||
@dataclass
|
||||
class LintIssue:
|
||||
severity: str # error | warning | info
|
||||
code: str
|
||||
target: str
|
||||
message: str
|
||||
suggestion: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class SchemaLintResult:
|
||||
issues: list[LintIssue] = field(default_factory=list)
|
||||
score: int = 100
|
||||
summary: str = ""
|
||||
|
||||
|
||||
def _rule_based_lint(schema_dict: dict) -> SchemaLintResult:
|
||||
"""Fallback rule-based lint when LLM is unavailable."""
|
||||
issues = []
|
||||
entity_types = schema_dict.get("entity_types", []) or []
|
||||
|
||||
# Check required entity types
|
||||
type_names = {et.get("entity_type", "") for et in entity_types}
|
||||
required_types = {"POI", "Restaurant", "Hotel", "Attraction"}
|
||||
missing = required_types - type_names
|
||||
for mt in missing:
|
||||
issues.append(LintIssue(
|
||||
severity="warning",
|
||||
code="missing_entity_type",
|
||||
target=mt,
|
||||
message=f"缺少常用实体类型: {mt}",
|
||||
suggestion=f"建议添加 {mt} 实体类型定义",
|
||||
))
|
||||
|
||||
# Check fields for each entity type
|
||||
for et in entity_types:
|
||||
fields = et.get("fields", [])
|
||||
field_names = {f.get("field_name", "") for f in fields}
|
||||
if "name" not in field_names:
|
||||
issues.append(LintIssue(
|
||||
severity="error",
|
||||
code="missing_field",
|
||||
target=f"{et.get('entity_type')}.name",
|
||||
message=f"实体类型 {et.get('entity_type')} 缺少必填字段: name",
|
||||
suggestion="所有实体类型必须包含 name 字段",
|
||||
))
|
||||
|
||||
score = max(0, 100 - len(issues) * 10)
|
||||
return SchemaLintResult(
|
||||
issues=issues,
|
||||
score=score,
|
||||
summary=f"发现 {len(issues)} 个问题" if issues else "Schema 检查通过",
|
||||
)
|
||||
|
||||
|
||||
async def lint_schema(
|
||||
schema_dict: dict,
|
||||
llm: LlmClient | None = None,
|
||||
) -> SchemaLintResult:
|
||||
"""Lint an ontology schema for issues.
|
||||
|
||||
Args:
|
||||
schema_dict: The ontology schema as a dict
|
||||
llm: Optional LLM client
|
||||
"""
|
||||
if llm is None:
|
||||
llm = LlmClient.from_settings() if settings.llm_api_key else None
|
||||
|
||||
if llm and llm.available():
|
||||
try:
|
||||
schema_json = json.dumps(schema_dict, ensure_ascii=False, indent=2)
|
||||
result = llm.chat_json(
|
||||
system=SYSTEM_PROMPT,
|
||||
user=USER_TEMPLATE.format(schema_json=schema_json),
|
||||
)
|
||||
issues = [
|
||||
LintIssue(
|
||||
severity=i.get("severity", "warning"),
|
||||
code=i.get("code", ""),
|
||||
target=i.get("target", ""),
|
||||
message=i.get("message", ""),
|
||||
suggestion=i.get("suggestion", ""),
|
||||
)
|
||||
for i in result.get("issues", [])
|
||||
]
|
||||
return SchemaLintResult(
|
||||
issues=issues,
|
||||
score=result.get("score", 80),
|
||||
summary=result.get("summary", ""),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return _rule_based_lint(schema_dict)
|
||||
Reference in New Issue
Block a user