Initial travel knowledge graph release
This commit is contained in:
117
scripts/fix_huaxi_event_temporal_fields.py
Normal file
117
scripts/fix_huaxi_event_temporal_fields.py
Normal file
@@ -0,0 +1,117 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Backfill Huaxi Event temporal/type/evidence fields into FalkorDB.
|
||||
|
||||
The extraction JSON already contains event_type and time_text/time_norm.
|
||||
This script makes those fields explicit on Event nodes and HAS_EVENT edges
|
||||
so graph browsing, timelines, and future event retrieval do not have to infer
|
||||
time from descriptions.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
ROOT = Path("/Users/xuexue/new2")
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from falkordb import FalkorDB # noqa: E402
|
||||
|
||||
from app.config import settings # noqa: E402
|
||||
|
||||
IN_JSON = ROOT / "docs/reports/huaxi_kg_schema_v1_ready.json"
|
||||
HUAXI_ELEMENT_ID = "amap:B035300A51"
|
||||
BAIDU_BAIKE_SOURCE_ID = "baidu_baike_huaxi_park"
|
||||
BAIDU_BAIKE_SOURCE_NAME = "百度百科"
|
||||
BAIDU_BAIKE_SOURCE_URL = "https://baike.baidu.com/item/%E8%8A%B1%E6%BA%AA%E5%85%AC%E5%9B%AD"
|
||||
|
||||
|
||||
def first_span(row: dict[str, Any]) -> dict[str, Any]:
|
||||
spans = row.get("source_spans") or []
|
||||
if spans and isinstance(spans[0], dict):
|
||||
return spans[0]
|
||||
return {}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
payload = json.loads(IN_JSON.read_text(encoding="utf-8"))
|
||||
graph = FalkorDB(
|
||||
host=settings.falkordb_host,
|
||||
port=settings.falkordb_port,
|
||||
).select_graph(settings.falkordb_graph)
|
||||
|
||||
updated = 0
|
||||
for row in payload.get("events", []):
|
||||
span = first_span(row)
|
||||
params = {
|
||||
"huaxi": HUAXI_ELEMENT_ID,
|
||||
"id": row.get("temp_id") or "",
|
||||
"title": row.get("title") or "",
|
||||
"event_type": row.get("event_type") or "",
|
||||
"event_date": row.get("time_text") or "",
|
||||
"event_date_norm": row.get("time_norm") or "",
|
||||
"description": row.get("description") or "",
|
||||
"evidence_id": span.get("evidence_id") or BAIDU_BAIKE_SOURCE_ID,
|
||||
"evidence_quote": span.get("quote") or "",
|
||||
"source_name": BAIDU_BAIKE_SOURCE_NAME,
|
||||
"source_url": BAIDU_BAIKE_SOURCE_URL,
|
||||
"confidence": float(row.get("confidence") or 0),
|
||||
}
|
||||
if not params["id"]:
|
||||
continue
|
||||
graph.query(
|
||||
"""
|
||||
MATCH (e:Event {id:$id})
|
||||
SET e.event_id=$id,
|
||||
e.title=$title,
|
||||
e.name=$title,
|
||||
e.event_type=$event_type,
|
||||
e.event_date=$event_date,
|
||||
e.event_time=$event_date,
|
||||
e.event_date_norm=$event_date_norm,
|
||||
e.time_text=$event_date,
|
||||
e.time_norm=$event_date_norm,
|
||||
e.description=$description,
|
||||
e.evidence_id=$evidence_id,
|
||||
e.evidence_quote=$evidence_quote,
|
||||
e.evidence_url=$source_url,
|
||||
e.source='baidu_baike',
|
||||
e.source_name=$source_name,
|
||||
e.source_url=$source_url,
|
||||
e.extraction_schema='kg_schema_v1',
|
||||
e.review_status='auto_published',
|
||||
e.confidence=$confidence
|
||||
""",
|
||||
params,
|
||||
)
|
||||
graph.query(
|
||||
"""
|
||||
MATCH (p:Place {element_id:$huaxi})-[r:HAS_EVENT]->(e:Event {id:$id})
|
||||
SET r.event_type=$event_type,
|
||||
r.event_date=$event_date,
|
||||
r.event_time=$event_date,
|
||||
r.event_date_norm=$event_date_norm,
|
||||
r.evidence_id=$evidence_id,
|
||||
r.evidence_quote=$evidence_quote,
|
||||
r.evidence_url=$source_url,
|
||||
r.source='baidu_baike',
|
||||
r.source_name=$source_name,
|
||||
r.source_url=$source_url,
|
||||
r.extraction_schema='kg_schema_v1',
|
||||
r.confidence=$confidence
|
||||
""",
|
||||
params,
|
||||
)
|
||||
updated += 1
|
||||
|
||||
print({
|
||||
"graph": settings.falkordb_graph,
|
||||
"updated_events": updated,
|
||||
"huaxi_anchor": HUAXI_ELEMENT_ID,
|
||||
})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user