#!/usr/bin/env python3 """Backfill Huaxi Event temporal/type/evidence fields into FalkorDB. The extraction JSON already contains event_type and time_text/time_norm. This script makes those fields explicit on Event nodes and HAS_EVENT edges so graph browsing, timelines, and future event retrieval do not have to infer time from descriptions. """ from __future__ import annotations import json import sys from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) from falkordb import FalkorDB # noqa: E402 from app.config import settings # noqa: E402 IN_JSON = ROOT / "docs/reports/huaxi_kg_schema_v1_ready.json" HUAXI_ELEMENT_ID = "amap:B035300A51" BAIDU_BAIKE_SOURCE_ID = "baidu_baike_huaxi_park" BAIDU_BAIKE_SOURCE_NAME = "百度百科" BAIDU_BAIKE_SOURCE_URL = "https://baike.baidu.com/item/%E8%8A%B1%E6%BA%AA%E5%85%AC%E5%9B%AD" def first_span(row: dict[str, Any]) -> dict[str, Any]: spans = row.get("source_spans") or [] if spans and isinstance(spans[0], dict): return spans[0] return {} def main() -> None: payload = json.loads(IN_JSON.read_text(encoding="utf-8")) graph = FalkorDB( host=settings.falkordb_host, port=settings.falkordb_port, ).select_graph(settings.falkordb_graph) updated = 0 for row in payload.get("events", []): span = first_span(row) params = { "huaxi": HUAXI_ELEMENT_ID, "id": row.get("temp_id") or "", "title": row.get("title") or "", "event_type": row.get("event_type") or "", "event_date": row.get("time_text") or "", "event_date_norm": row.get("time_norm") or "", "description": row.get("description") or "", "evidence_id": span.get("evidence_id") or BAIDU_BAIKE_SOURCE_ID, "evidence_quote": span.get("quote") or "", "source_name": BAIDU_BAIKE_SOURCE_NAME, "source_url": BAIDU_BAIKE_SOURCE_URL, "confidence": float(row.get("confidence") or 0), } if not params["id"]: continue graph.query( """ MATCH (e:Event {id:$id}) SET e.event_id=$id, e.title=$title, e.name=$title, e.event_type=$event_type, e.event_date=$event_date, e.event_time=$event_date, e.event_date_norm=$event_date_norm, e.time_text=$event_date, e.time_norm=$event_date_norm, e.description=$description, e.evidence_id=$evidence_id, e.evidence_quote=$evidence_quote, e.evidence_url=$source_url, e.source='baidu_baike', e.source_name=$source_name, e.source_url=$source_url, e.extraction_schema='kg_schema_v1', e.review_status='auto_published', e.confidence=$confidence """, params, ) graph.query( """ MATCH (p:Place {element_id:$huaxi})-[r:HAS_EVENT]->(e:Event {id:$id}) SET r.event_type=$event_type, r.event_date=$event_date, r.event_time=$event_date, r.event_date_norm=$event_date_norm, r.evidence_id=$evidence_id, r.evidence_quote=$evidence_quote, r.evidence_url=$source_url, r.source='baidu_baike', r.source_name=$source_name, r.source_url=$source_url, r.extraction_schema='kg_schema_v1', r.confidence=$confidence """, params, ) updated += 1 print({ "graph": settings.falkordb_graph, "updated_events": updated, "huaxi_anchor": HUAXI_ELEMENT_ID, }) if __name__ == "__main__": main()