Initial travel knowledge graph release

This commit is contained in:
2026-06-09 09:56:26 +08:00
commit 5f061295d8
402 changed files with 103877 additions and 0 deletions

141
app/agents/schema_lint.py Normal file
View File

@@ -0,0 +1,141 @@
"""Phase 1 — Schema Lint Agent.
Validates an ontology schema for completeness, consistency, and best practices.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from app.config import settings
from app.llm_client import LlmClient
SYSTEM_PROMPT = """你是一个知识图谱 Schema 审计员。给一个 ontology schema检查它的问题。输出 JSON。
检查维度:
1. 类型覆盖率 — 常用实体类型POI, Restaurant, Hotel, Attraction是否都有
2. 字段完整性 — 每个实体类型的核心字段是否齐备
3. 关系连通性 — 实体类型之间是否有合理的关系类型
4. 命名字段一致性 — 同类字段在不同实体类型下命名是否一致
5. 必填字段合理性 — 标记为 required 的字段是否确实必要
问题严重度:
- error: 会导致图谱不可用
- warning: 建议修复
- info: 可选优化
只输出 JSON。"""
USER_TEMPLATE = """Ontology Schema
{schema_json}
输出 schema
{{
"issues": [
{{
"severity": "error|warning|info",
"code": "missing_entity_type|missing_field|orphan_type|naming_inconsistency",
"target": "entity_type.field_name",
"message": "中文描述",
"suggestion": "修复建议"
}}
],
"score": 0~100,
"summary": "一句话总结"
}}"""
@dataclass
class LintIssue:
severity: str # error | warning | info
code: str
target: str
message: str
suggestion: str = ""
@dataclass
class SchemaLintResult:
issues: list[LintIssue] = field(default_factory=list)
score: int = 100
summary: str = ""
def _rule_based_lint(schema_dict: dict) -> SchemaLintResult:
"""Fallback rule-based lint when LLM is unavailable."""
issues = []
entity_types = schema_dict.get("entity_types", []) or []
# Check required entity types
type_names = {et.get("entity_type", "") for et in entity_types}
required_types = {"POI", "Restaurant", "Hotel", "Attraction"}
missing = required_types - type_names
for mt in missing:
issues.append(LintIssue(
severity="warning",
code="missing_entity_type",
target=mt,
message=f"缺少常用实体类型: {mt}",
suggestion=f"建议添加 {mt} 实体类型定义",
))
# Check fields for each entity type
for et in entity_types:
fields = et.get("fields", [])
field_names = {f.get("field_name", "") for f in fields}
if "name" not in field_names:
issues.append(LintIssue(
severity="error",
code="missing_field",
target=f"{et.get('entity_type')}.name",
message=f"实体类型 {et.get('entity_type')} 缺少必填字段: name",
suggestion="所有实体类型必须包含 name 字段",
))
score = max(0, 100 - len(issues) * 10)
return SchemaLintResult(
issues=issues,
score=score,
summary=f"发现 {len(issues)} 个问题" if issues else "Schema 检查通过",
)
async def lint_schema(
schema_dict: dict,
llm: LlmClient | None = None,
) -> SchemaLintResult:
"""Lint an ontology schema for issues.
Args:
schema_dict: The ontology schema as a dict
llm: Optional LLM client
"""
if llm is None:
llm = LlmClient.from_settings() if settings.llm_api_key else None
if llm and llm.available():
try:
schema_json = json.dumps(schema_dict, ensure_ascii=False, indent=2)
result = llm.chat_json(
system=SYSTEM_PROMPT,
user=USER_TEMPLATE.format(schema_json=schema_json),
)
issues = [
LintIssue(
severity=i.get("severity", "warning"),
code=i.get("code", ""),
target=i.get("target", ""),
message=i.get("message", ""),
suggestion=i.get("suggestion", ""),
)
for i in result.get("issues", [])
]
return SchemaLintResult(
issues=issues,
score=result.get("score", 80),
summary=result.get("summary", ""),
)
except Exception:
pass
return _rule_based_lint(schema_dict)