Initial travel knowledge graph release

This commit is contained in:
2026-06-09 09:56:26 +08:00
commit 5f061295d8
402 changed files with 103877 additions and 0 deletions

View File

@@ -0,0 +1,218 @@
-- new2 KG core + spatial schema draft.
-- Safe direction: additive migration. It does not drop legacy tables.
-- Requires PostgreSQL with PostGIS extension installed.
CREATE EXTENSION IF NOT EXISTS postgis;
CREATE TABLE IF NOT EXISTS kg_entities (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
natural_key TEXT NOT NULL,
entity_type TEXT NOT NULL,
name TEXT NOT NULL,
aliases JSONB NOT NULL DEFAULT '[]',
description TEXT NOT NULL DEFAULT '',
attributes JSONB NOT NULL DEFAULT '{}',
confidence DOUBLE PRECISION NOT NULL DEFAULT 0.5,
status TEXT NOT NULL DEFAULT 'pending_review',
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (tenant_id, project_id, natural_key)
);
CREATE TABLE IF NOT EXISTS kg_events (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
natural_key TEXT NOT NULL,
event_type TEXT NOT NULL,
title TEXT NOT NULL,
time_text TEXT NOT NULL DEFAULT '',
time_norm TEXT NOT NULL DEFAULT '',
description TEXT NOT NULL DEFAULT '',
participants JSONB NOT NULL DEFAULT '[]',
attributes JSONB NOT NULL DEFAULT '{}',
confidence DOUBLE PRECISION NOT NULL DEFAULT 0.5,
status TEXT NOT NULL DEFAULT 'pending_review',
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (tenant_id, project_id, natural_key)
);
CREATE TABLE IF NOT EXISTS kg_concepts (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
concept_key TEXT NOT NULL,
name TEXT NOT NULL,
concept_type TEXT NOT NULL DEFAULT 'Concept',
parent_key TEXT,
description TEXT NOT NULL DEFAULT '',
attributes JSONB NOT NULL DEFAULT '{}',
confidence DOUBLE PRECISION NOT NULL DEFAULT 0.5,
status TEXT NOT NULL DEFAULT 'pending_review',
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (tenant_id, project_id, concept_key)
);
CREATE TABLE IF NOT EXISTS kg_relations (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
relation_type TEXT NOT NULL,
source_type TEXT NOT NULL,
target_type TEXT NOT NULL,
description TEXT NOT NULL DEFAULT '',
inverse_relation TEXT,
confidence DOUBLE PRECISION NOT NULL DEFAULT 0.5,
status TEXT NOT NULL DEFAULT 'pending_review',
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (tenant_id, project_id, relation_type, source_type, target_type)
);
-- A statement is the auditable candidate fact layer:
-- subject -- predicate --> object, with evidence, confidence and qualifiers.
CREATE TABLE IF NOT EXISTS kg_statements (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
subject_kind TEXT NOT NULL,
subject_key TEXT NOT NULL,
predicate TEXT NOT NULL,
object_kind TEXT NOT NULL,
object_key TEXT NOT NULL,
qualifiers JSONB NOT NULL DEFAULT '{}',
evidence_ids JSONB NOT NULL DEFAULT '[]',
source_platforms JSONB NOT NULL DEFAULT '[]',
extractor_votes JSONB NOT NULL DEFAULT '[]',
confidence DOUBLE PRECISION NOT NULL DEFAULT 0.5,
status TEXT NOT NULL DEFAULT 'pending_review',
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_kg_statements_subj
ON kg_statements (tenant_id, project_id, subject_kind, subject_key);
CREATE INDEX IF NOT EXISTS idx_kg_statements_obj
ON kg_statements (tenant_id, project_id, object_kind, object_key);
CREATE INDEX IF NOT EXISTS idx_kg_statements_predicate
ON kg_statements (tenant_id, project_id, predicate);
CREATE TABLE IF NOT EXISTS kg_evidence_links (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
statement_id BIGINT REFERENCES kg_statements(id) ON DELETE CASCADE,
evidence_table TEXT NOT NULL DEFAULT 'social_evidence',
evidence_id TEXT NOT NULL,
support_type TEXT NOT NULL DEFAULT 'supports',
quote TEXT NOT NULL DEFAULT '',
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE TABLE IF NOT EXISTS kg_schema_proposals (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
proposal_type TEXT NOT NULL,
name TEXT NOT NULL,
source_type TEXT,
target_type TEXT,
value_type TEXT,
reason TEXT NOT NULL DEFAULT '',
examples JSONB NOT NULL DEFAULT '[]',
evidence_ids JSONB NOT NULL DEFAULT '[]',
confidence DOUBLE PRECISION NOT NULL DEFAULT 0.5,
status TEXT NOT NULL DEFAULT 'pending_review',
reviewer TEXT,
review_note TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_kg_schema_proposals_status
ON kg_schema_proposals (tenant_id, project_id, status, proposal_type);
CREATE TABLE IF NOT EXISTS kg_schema_versions (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
version TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'draft',
schema_jsonb JSONB NOT NULL DEFAULT '{}',
source_proposal_ids JSONB NOT NULL DEFAULT '[]',
created_by TEXT NOT NULL DEFAULT 'system',
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (tenant_id, project_id, version)
);
-- Spatial layer for Place-like entities.
CREATE TABLE IF NOT EXISTS kg_place_spatial (
entity_key TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
name TEXT NOT NULL,
lng DOUBLE PRECISION NOT NULL,
lat DOUBLE PRECISION NOT NULL,
geom geometry(Point, 4326) GENERATED ALWAYS AS (
ST_SetSRID(ST_MakePoint(lng, lat), 4326)
) STORED,
h3_r7 TEXT,
h3_r8 TEXT,
h3_r9 TEXT,
h3_r10 TEXT,
address TEXT NOT NULL DEFAULT '',
adcode TEXT,
district TEXT,
source TEXT NOT NULL DEFAULT '',
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_kg_place_spatial_geom
ON kg_place_spatial USING GIST (geom);
CREATE INDEX IF NOT EXISTS idx_kg_place_spatial_h3_r9
ON kg_place_spatial (tenant_id, project_id, h3_r9);
CREATE INDEX IF NOT EXISTS idx_kg_place_spatial_h3_r10
ON kg_place_spatial (tenant_id, project_id, h3_r10);
CREATE TABLE IF NOT EXISTS kg_geo_cells (
h3_id TEXT PRIMARY KEY,
resolution INTEGER NOT NULL,
parent_h3_id TEXT,
center_lng DOUBLE PRECISION,
center_lat DOUBLE PRECISION,
boundary JSONB NOT NULL DEFAULT '[]',
stats JSONB NOT NULL DEFAULT '{}',
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- Route distance is expensive. Cache only topK/high-value routes.
CREATE TABLE IF NOT EXISTS kg_route_metrics (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
origin_kind TEXT NOT NULL DEFAULT 'h3_cell',
origin_key TEXT NOT NULL,
target_entity_key TEXT NOT NULL,
travel_mode TEXT NOT NULL DEFAULT 'walking',
straight_distance_m DOUBLE PRECISION,
route_distance_m DOUBLE PRECISION,
duration_s DOUBLE PRECISION,
provider TEXT NOT NULL DEFAULT 'amap',
path_summary TEXT NOT NULL DEFAULT '',
raw_jsonb JSONB NOT NULL DEFAULT '{}',
expires_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (tenant_id, project_id, origin_kind, origin_key, target_entity_key, travel_mode, provider)
);
CREATE INDEX IF NOT EXISTS idx_kg_route_metrics_lookup
ON kg_route_metrics (tenant_id, project_id, origin_kind, origin_key, travel_mode);