Initial travel knowledge graph release
This commit is contained in:
80
scripts/cleanup_huaxi_demo_duplicates.py
Normal file
80
scripts/cleanup_huaxi_demo_duplicates.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Remove Huaxi demo duplicate nodes after their knowledge is on canonical POIs."""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path("/Users/xuexue/new2")
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from falkordb import FalkorDB # noqa: E402
|
||||
|
||||
from app.config import settings # noqa: E402
|
||||
|
||||
HUAXI = "amap:B035300A51"
|
||||
TIANHETAN = "amap:B035300A2B"
|
||||
QINGYAN = "amap:B035300ESE"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
g = FalkorDB(host=settings.falkordb_host, port=settings.falkordb_port).select_graph(
|
||||
settings.falkordb_graph
|
||||
)
|
||||
|
||||
# Keep one canonical Huaxi Park: the AMap sight POI with coordinates.
|
||||
# The kg_schema_v1 facts were already mirrored to this node.
|
||||
g.query("MATCH (n {id:'ent_huaxi_park'}) DETACH DELETE n")
|
||||
|
||||
# Redirect nearby attractions to existing AMap scenic POIs, then remove temp nodes.
|
||||
g.query(
|
||||
"""
|
||||
MATCH (h:Place {element_id:$huaxi})
|
||||
MATCH (t:Place {element_id:$tianhetan})
|
||||
MERGE (h)-[r:NEARBY_ATTRACTION]->(t)
|
||||
SET r.confidence=0.82,
|
||||
r.evidence_quote='逆流而行8公里即可抵达天河潭',
|
||||
r.source='kg_schema_v1+entity_alignment',
|
||||
r.alignment_note='target aligned from ent_tianhetan to existing AMap 天河潭旅游度假区'
|
||||
""",
|
||||
{"huaxi": HUAXI, "tianhetan": TIANHETAN},
|
||||
)
|
||||
g.query(
|
||||
"""
|
||||
MATCH (h:Place {element_id:$huaxi})
|
||||
MATCH (q:Place {element_id:$qingyan})
|
||||
MERGE (h)-[r:NEARBY_ATTRACTION]->(q)
|
||||
SET r.confidence=0.84,
|
||||
r.evidence_quote='花溪以南12公里处的青岩镇',
|
||||
r.source='kg_schema_v1+entity_alignment',
|
||||
r.alignment_note='target aligned from ent_qingyan to existing AMap 青岩古镇'
|
||||
""",
|
||||
{"huaxi": HUAXI, "qingyan": QINGYAN},
|
||||
)
|
||||
g.query("MATCH (n {id:'ent_tianhetan'}) DETACH DELETE n")
|
||||
g.query("MATCH (n {id:'ent_qingyan'}) DETACH DELETE n")
|
||||
|
||||
# Remove confusing extraction id from canonical display properties.
|
||||
g.query(
|
||||
"""
|
||||
MATCH (p:Place {element_id:$huaxi})
|
||||
SET p.source_extraction='kg_schema_v1'
|
||||
REMOVE p.kg_id
|
||||
""",
|
||||
{"huaxi": HUAXI},
|
||||
)
|
||||
|
||||
res = g.query(
|
||||
"""
|
||||
MATCH (n)
|
||||
WHERE n.id IN ['ent_huaxi_park','ent_tianhetan','ent_qingyan']
|
||||
RETURN count(n)
|
||||
"""
|
||||
)
|
||||
remaining = res.result_set[0][0] if res.result_set else None
|
||||
print({"removed_demo_duplicates": True, "remaining_target_temp_nodes": remaining})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user