Files
bxh/scripts/cleanup_huaxi_demo_duplicates.py

81 lines
2.5 KiB
Python

#!/usr/bin/env python3
"""Remove Huaxi demo duplicate nodes after their knowledge is on canonical POIs."""
from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from falkordb import FalkorDB # noqa: E402
from app.config import settings # noqa: E402
HUAXI = "amap:B035300A51"
TIANHETAN = "amap:B035300A2B"
QINGYAN = "amap:B035300ESE"
def main() -> None:
g = FalkorDB(host=settings.falkordb_host, port=settings.falkordb_port).select_graph(
settings.falkordb_graph
)
# Keep one canonical Huaxi Park: the AMap sight POI with coordinates.
# The kg_schema_v1 facts were already mirrored to this node.
g.query("MATCH (n {id:'ent_huaxi_park'}) DETACH DELETE n")
# Redirect nearby attractions to existing AMap scenic POIs, then remove temp nodes.
g.query(
"""
MATCH (h:Place {element_id:$huaxi})
MATCH (t:Place {element_id:$tianhetan})
MERGE (h)-[r:NEARBY_ATTRACTION]->(t)
SET r.confidence=0.82,
r.evidence_quote='逆流而行8公里即可抵达天河潭',
r.source='kg_schema_v1+entity_alignment',
r.alignment_note='target aligned from ent_tianhetan to existing AMap 天河潭旅游度假区'
""",
{"huaxi": HUAXI, "tianhetan": TIANHETAN},
)
g.query(
"""
MATCH (h:Place {element_id:$huaxi})
MATCH (q:Place {element_id:$qingyan})
MERGE (h)-[r:NEARBY_ATTRACTION]->(q)
SET r.confidence=0.84,
r.evidence_quote='花溪以南12公里处的青岩镇',
r.source='kg_schema_v1+entity_alignment',
r.alignment_note='target aligned from ent_qingyan to existing AMap 青岩古镇'
""",
{"huaxi": HUAXI, "qingyan": QINGYAN},
)
g.query("MATCH (n {id:'ent_tianhetan'}) DETACH DELETE n")
g.query("MATCH (n {id:'ent_qingyan'}) DETACH DELETE n")
# Remove confusing extraction id from canonical display properties.
g.query(
"""
MATCH (p:Place {element_id:$huaxi})
SET p.source_extraction='kg_schema_v1'
REMOVE p.kg_id
""",
{"huaxi": HUAXI},
)
res = g.query(
"""
MATCH (n)
WHERE n.id IN ['ent_huaxi_park','ent_tianhetan','ent_qingyan']
RETURN count(n)
"""
)
remaining = res.result_set[0][0] if res.result_set else None
print({"removed_demo_duplicates": True, "remaining_target_temp_nodes": remaining})
if __name__ == "__main__":
main()