Files
bxh/app/db.py

2590 lines
108 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""PostgreSQL database layer — pool, DDL, and all CRUD operations."""
from __future__ import annotations
import asyncio
import json
from contextlib import asynccontextmanager
from typing import Any
import psycopg
from psycopg.rows import dict_row
from psycopg_pool import AsyncConnectionPool
from app.config import settings
from app.security import hash_password
_pool: AsyncConnectionPool | None = None
async def init_pool() -> None:
global _pool
_pool = AsyncConnectionPool(
conninfo=settings.database_url,
min_size=2,
max_size=10,
kwargs={"row_factory": dict_row},
open=False,
)
await _pool.open()
if settings.db_migrations_enabled:
await _run_migrations()
await _seed_rbac()
await _sa_reap_orphans()
async def close_pool() -> None:
if _pool:
await _pool.close()
@asynccontextmanager
async def get_conn():
assert _pool, "DB pool not initialized"
async with _pool.connection() as conn:
yield conn
async def _run_migrations() -> None:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(f"CREATE SCHEMA IF NOT EXISTS {settings.db_schema}")
for stmt in _ddl_statements():
await cur.execute(stmt)
await conn.commit()
# 3 roles kept by design: 系统管理员 / 运营审核员 / 采集人员
_SEED_ROLES = [
("admin", "系统管理员", "全部权限(含 Schema / 发布 / 用户与权限管理)", True, 10),
("operator", "运营审核员", "审核候选、字段级裁决、领治理工单", True, 20),
("collector", "采集人员", "领补藏任务、CSV 导入、Python 采集", True, 30),
]
_SEED_CAPS = [
("view_plaza", "查看知识广场", 10),
("modify_schema", "修改 Schema", 20),
("import_data", "导入数据", 30),
("review_candidate", "审核候选", 40),
("review_field", "字段级审核", 50),
("publish_falkor", "发布到 Falkor", 60),
("governance", "治理工作台", 70),
("system_settings", "系统设置", 80),
]
# matrix[cap_key] = {role_key: value}
_SEED_MATRIX = {
"view_plaza": {"admin": "", "operator": "", "collector": "部分"},
"modify_schema": {"admin": "", "operator": "", "collector": ""},
"import_data": {"admin": "", "operator": "", "collector": ""},
"review_candidate": {"admin": "", "operator": "", "collector": ""},
"review_field": {"admin": "", "operator": "", "collector": ""},
"publish_falkor": {"admin": "", "operator": "", "collector": ""},
"governance": {"admin": "", "operator": "", "collector": "领任务"},
"system_settings": {"admin": "", "operator": "", "collector": ""},
}
async def _seed_rbac() -> None:
"""Idempotent seed: 3 roles, capability matrix, default admin user."""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
for role_key, label, desc, is_sys, order in _SEED_ROLES:
await cur.execute(
f"""INSERT INTO {s}.rbac_roles (role_key, label, description, is_system, sort_order)
VALUES (%s, %s, %s, %s, %s) ON CONFLICT (role_key) DO NOTHING""",
(role_key, label, desc, is_sys, order),
)
for cap_key, label, order in _SEED_CAPS:
await cur.execute(
f"""INSERT INTO {s}.rbac_capabilities (cap_key, label, sort_order)
VALUES (%s, %s, %s) ON CONFLICT (cap_key) DO NOTHING""",
(cap_key, label, order),
)
for cap_key, role_vals in _SEED_MATRIX.items():
for role_key, value in role_vals.items():
await cur.execute(
f"""INSERT INTO {s}.rbac_role_caps (role_key, cap_key, value)
VALUES (%s, %s, %s) ON CONFLICT (role_key, cap_key) DO NOTHING""",
(role_key, cap_key, value),
)
# default admin account so existing credentials keep working
await cur.execute(
f"SELECT id FROM {s}.users WHERE username=%s",
(settings.auth_default_username,),
)
row = await cur.fetchone()
if not row:
await cur.execute(
f"""INSERT INTO {s}.users (username, full_name, hashed_password, status)
VALUES (%s, %s, %s, 'active') RETURNING id""",
(
settings.auth_default_username,
"系统管理员",
hash_password(settings.auth_default_password),
),
)
uid = (await cur.fetchone())["id"]
await cur.execute(
f"""INSERT INTO {s}.user_roles (user_id, role_key) VALUES (%s, 'admin')
ON CONFLICT DO NOTHING""",
(uid,),
)
await conn.commit()
def _ddl_statements() -> list[str]:
s = settings.db_schema
return [
# ── Existing tables (CREATE IF NOT EXISTS) ──────────────────────────
f"""CREATE TABLE IF NOT EXISTS {s}.projects (
project_id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'active',
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.graph_releases (
graph_release_id BIGSERIAL PRIMARY KEY,
project_id TEXT NOT NULL,
graph_name TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'draft',
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.import_templates (
template_id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
name TEXT NOT NULL,
version TEXT NOT NULL DEFAULT '1',
mapping_targets JSONB NOT NULL DEFAULT '[]',
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.mapping_profiles (
profile_id BIGSERIAL PRIMARY KEY,
template_id BIGINT,
mappings JSONB NOT NULL DEFAULT '{{}}',
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.import_batches (
batch_id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
source TEXT,
source_code TEXT,
total_rows INTEGER NOT NULL DEFAULT 0,
success_rows INTEGER NOT NULL DEFAULT 0,
failed_rows INTEGER NOT NULL DEFAULT 0,
status TEXT NOT NULL DEFAULT 'pending',
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
completed_at TIMESTAMPTZ
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.raw_records (
id BIGSERIAL PRIMARY KEY,
batch_id BIGINT REFERENCES {s}.import_batches(batch_id),
row_number INTEGER NOT NULL,
raw_jsonb JSONB NOT NULL DEFAULT '{{}}',
parse_status TEXT NOT NULL DEFAULT 'ok',
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.candidate_entities (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
entity_type TEXT NOT NULL,
natural_key TEXT NOT NULL,
payload JSONB NOT NULL DEFAULT '{{}}',
confidence DOUBLE PRECISION NOT NULL DEFAULT 0.5,
status TEXT NOT NULL DEFAULT 'pending_review',
field_provenance_jsonb JSONB NOT NULL DEFAULT '{{}}',
batch_id BIGINT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.candidate_relations (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
relation_type TEXT NOT NULL,
source_natural_key TEXT NOT NULL,
target_natural_key TEXT NOT NULL,
payload JSONB NOT NULL DEFAULT '{{}}',
status TEXT NOT NULL DEFAULT 'pending_review',
batch_id BIGINT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.validation_issues (
id BIGSERIAL PRIMARY KEY,
candidate_id BIGINT,
severity TEXT NOT NULL DEFAULT 'warning',
code TEXT NOT NULL,
message TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.review_actions (
id BIGSERIAL PRIMARY KEY,
candidate_id BIGINT NOT NULL,
candidate_type TEXT NOT NULL DEFAULT 'entity',
action TEXT NOT NULL,
actor TEXT NOT NULL,
note TEXT,
field_decisions_jsonb JSONB NOT NULL DEFAULT '{{}}',
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.publish_jobs (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
candidate_ids JSONB NOT NULL DEFAULT '[]',
status TEXT NOT NULL DEFAULT 'pending',
actor TEXT NOT NULL,
diff_summary_jsonb JSONB NOT NULL DEFAULT '{{}}',
rollback_target_release_id BIGINT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
completed_at TIMESTAMPTZ
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.ontology_schemas (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
version TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'draft',
schema_jsonb JSONB NOT NULL DEFAULT '{{}}',
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.ontology_entity_types (
id BIGSERIAL PRIMARY KEY,
schema_id BIGINT,
entity_type TEXT NOT NULL,
label TEXT,
fields JSONB NOT NULL DEFAULT '[]',
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.ontology_fields (
id BIGSERIAL PRIMARY KEY,
entity_type_id BIGINT,
field_name TEXT NOT NULL,
value_type TEXT NOT NULL DEFAULT 'string',
required BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.ontology_relation_types (
id BIGSERIAL PRIMARY KEY,
schema_id BIGINT,
relation_type TEXT NOT NULL,
source_type TEXT,
target_type TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.field_proposals (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
column_name TEXT NOT NULL,
suggested_entity_type TEXT,
status TEXT NOT NULL DEFAULT 'pending',
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
# ── New tables ───────────────────────────────────────────────────────
f"""CREATE TABLE IF NOT EXISTS {s}.source_profiles (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
source_code TEXT NOT NULL,
source_name TEXT NOT NULL,
source_type TEXT NOT NULL,
description TEXT,
api_endpoint TEXT,
auth_method TEXT,
update_frequency TEXT,
authority_level INTEGER NOT NULL DEFAULT 3,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
metadata_jsonb JSONB NOT NULL DEFAULT '{{}}',
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (tenant_id, project_id, source_code)
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.question_traces (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
source TEXT NOT NULL,
origin TEXT,
user_session TEXT,
question_text TEXT NOT NULL,
parsed_intent_jsonb JSONB NOT NULL DEFAULT '{{}}',
coverage_score DOUBLE PRECISION,
confidence DOUBLE PRECISION,
evidence_count INTEGER,
matched_entity_ids JSONB NOT NULL DEFAULT '[]',
missing_fields JSONB NOT NULL DEFAULT '[]',
scenario_tags JSONB NOT NULL DEFAULT '[]',
suggested_action TEXT,
acquisition_task_id BIGINT,
asked_at TIMESTAMPTZ NOT NULL DEFAULT now(),
evaluated_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE INDEX IF NOT EXISTS idx_qt_project_source
ON {s}.question_traces (tenant_id, project_id, source)""",
f"""CREATE INDEX IF NOT EXISTS idx_qt_evaluated_at
ON {s}.question_traces (evaluated_at DESC)""",
f"""CREATE TABLE IF NOT EXISTS {s}.simulation_questions (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
question_text TEXT NOT NULL,
scenario_tags JSONB NOT NULL DEFAULT '[]',
enabled BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.acquisition_tasks (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
created_by TEXT NOT NULL,
triggered_by_trace_id BIGINT,
title TEXT NOT NULL,
description TEXT,
scenario_tags JSONB NOT NULL DEFAULT '[]',
target_entity_types JSONB NOT NULL DEFAULT '[]',
target_fields JSONB NOT NULL DEFAULT '[]',
suggested_collection_method TEXT,
status TEXT NOT NULL DEFAULT 'pending',
priority INTEGER NOT NULL DEFAULT 3,
assignee TEXT,
due_at TIMESTAMPTZ,
result_batch_id BIGINT,
result_summary TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
assigned_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ
)""",
f"""CREATE INDEX IF NOT EXISTS idx_acq_status
ON {s}.acquisition_tasks (tenant_id, project_id, status, priority DESC)""",
f"""CREATE TABLE IF NOT EXISTS {s}.inventory_issues (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
issue_type TEXT NOT NULL,
severity TEXT NOT NULL,
target_entity_type TEXT,
target_natural_key TEXT,
target_field TEXT,
description TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'open',
resolved_by TEXT,
resolved_at TIMESTAMPTZ,
resolution_note TEXT,
related_acquisition_task_id BIGINT,
detected_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE INDEX IF NOT EXISTS idx_inv_status
ON {s}.inventory_issues (tenant_id, project_id, status, severity)""",
f"""CREATE TABLE IF NOT EXISTS {s}.vocabulary_terms (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id TEXT NOT NULL DEFAULT 'default',
entity_type TEXT NOT NULL,
canonical_name TEXT NOT NULL,
aliases JSONB NOT NULL DEFAULT '[]',
forbidden_aliases JSONB NOT NULL DEFAULT '[]',
notes TEXT,
created_by TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (tenant_id, project_id, entity_type, canonical_name)
)""",
f"""CREATE INDEX IF NOT EXISTS idx_vocab_aliases
ON {s}.vocabulary_terms USING GIN (aliases)""",
f"""CREATE TABLE IF NOT EXISTS {s}.agent_call_logs (
id BIGSERIAL PRIMARY KEY,
agent_name TEXT NOT NULL,
project_id TEXT NOT NULL,
actor TEXT,
request_id TEXT,
model TEXT,
prompt_chars INTEGER,
response_chars INTEGER,
latency_ms INTEGER,
status TEXT NOT NULL,
error_message TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
# ── RBAC & accounts (P1) ────────────────────────────────────────────
f"""CREATE TABLE IF NOT EXISTS {s}.rbac_roles (
role_key TEXT PRIMARY KEY,
label TEXT NOT NULL,
description TEXT,
is_system BOOLEAN NOT NULL DEFAULT FALSE,
sort_order INTEGER NOT NULL DEFAULT 100,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.rbac_capabilities (
cap_key TEXT PRIMARY KEY,
label TEXT NOT NULL,
sort_order INTEGER NOT NULL DEFAULT 100,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.rbac_role_caps (
role_key TEXT NOT NULL REFERENCES {s}.rbac_roles(role_key) ON DELETE CASCADE,
cap_key TEXT NOT NULL REFERENCES {s}.rbac_capabilities(cap_key) ON DELETE CASCADE,
value TEXT NOT NULL DEFAULT '',
PRIMARY KEY (role_key, cap_key)
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.users (
id BIGSERIAL PRIMARY KEY,
username TEXT NOT NULL UNIQUE,
full_name TEXT,
phone TEXT,
hashed_password TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'active',
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE TABLE IF NOT EXISTS {s}.user_roles (
user_id BIGINT NOT NULL REFERENCES {s}.users(id) ON DELETE CASCADE,
role_key TEXT NOT NULL REFERENCES {s}.rbac_roles(role_key) ON DELETE CASCADE,
PRIMARY KEY (user_id, role_key)
)""",
# ── City areas & responsibility (P3) ────────────────────────────────
f"""CREATE TABLE IF NOT EXISTS {s}.areas (
area_id TEXT PRIMARY KEY,
name TEXT NOT NULL,
responsible_user_id BIGINT REFERENCES {s}.users(id) ON DELETE SET NULL,
note TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""ALTER TABLE {s}.acquisition_tasks
ADD COLUMN IF NOT EXISTS area_id TEXT""",
f"""ALTER TABLE {s}.simulation_questions
ADD COLUMN IF NOT EXISTS source TEXT NOT NULL DEFAULT 'manual'""",
f"""CREATE TABLE IF NOT EXISTS {s}.audit_runs (
id BIGSERIAL PRIMARY KEY,
source TEXT NOT NULL DEFAULT 'simulate',
status TEXT NOT NULL DEFAULT 'running',
total INTEGER NOT NULL DEFAULT 0,
done INTEGER NOT NULL DEFAULT 0,
hits INTEGER NOT NULL DEFAULT 0,
gaps INTEGER NOT NULL DEFAULT 0,
low_quality INTEGER NOT NULL DEFAULT 0,
conflicts INTEGER NOT NULL DEFAULT 0,
error TEXT,
started_at TIMESTAMPTZ NOT NULL DEFAULT now(),
finished_at TIMESTAMPTZ
)""",
f"""ALTER TABLE {s}.areas ADD COLUMN IF NOT EXISTS level TEXT""",
f"""ALTER TABLE {s}.areas ADD COLUMN IF NOT EXISTS parent_id TEXT""",
f"""CREATE INDEX IF NOT EXISTS idx_areas_parent
ON {s}.areas (parent_id)""",
# ── Notifications / in-app inbox (P4) ───────────────────────────────
f"""CREATE TABLE IF NOT EXISTS {s}.notifications (
id BIGSERIAL PRIMARY KEY,
user_id BIGINT NOT NULL REFERENCES {s}.users(id) ON DELETE CASCADE,
type TEXT NOT NULL DEFAULT 'task',
title TEXT NOT NULL,
body TEXT,
related_task_id BIGINT,
area_id TEXT,
is_read BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE INDEX IF NOT EXISTS idx_notif_user
ON {s}.notifications (user_id, is_read, created_at DESC)""",
# ── Super Agent autonomous runs (guarded loop) ─────────────────────
f"""CREATE TABLE IF NOT EXISTS {s}.super_agent_runs (
id BIGSERIAL PRIMARY KEY,
goal TEXT,
status TEXT NOT NULL DEFAULT 'running',
max_steps INTEGER NOT NULL DEFAULT 6,
budget_pois INTEGER NOT NULL DEFAULT 200,
step INTEGER NOT NULL DEFAULT 0,
ingested INTEGER NOT NULL DEFAULT 0,
stop_requested BOOLEAN NOT NULL DEFAULT FALSE,
log JSONB NOT NULL DEFAULT '[]',
error TEXT,
started_at TIMESTAMPTZ NOT NULL DEFAULT now(),
finished_at TIMESTAMPTZ
)""",
# ── Super Agent work-order ledger (one row per AI action) ───────────
f"""CREATE TABLE IF NOT EXISTS {s}.super_agent_tasks (
id BIGSERIAL PRIMARY KEY,
run_id BIGINT NOT NULL,
step INTEGER NOT NULL DEFAULT 0,
category TEXT,
action TEXT NOT NULL DEFAULT 'ingest',
plan_reason TEXT,
tool TEXT,
fetched INTEGER NOT NULL DEFAULT 0,
approved INTEGER NOT NULL DEFAULT 0,
pending INTEGER NOT NULL DEFAULT 0,
skipped INTEGER NOT NULL DEFAULT 0,
status TEXT NOT NULL DEFAULT 'done',
note TEXT,
related_task_id BIGINT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE INDEX IF NOT EXISTS idx_sa_tasks_run
ON {s}.super_agent_tasks (run_id, id)""",
# ── 高德全城网格采集(分页记忆 + 枯竭持久化,省额度核心)─────────────
f"""CREATE TABLE IF NOT EXISTS {s}.gaode_grid_cells (
id BIGSERIAL PRIMARY KEY,
cat TEXT NOT NULL,
typecode TEXT NOT NULL,
min_lng DOUBLE PRECISION NOT NULL,
min_lat DOUBLE PRECISION NOT NULL,
max_lng DOUBLE PRECISION NOT NULL,
max_lat DOUBLE PRECISION NOT NULL,
depth INTEGER NOT NULL DEFAULT 0,
next_page INTEGER NOT NULL DEFAULT 1,
fetched INTEGER NOT NULL DEFAULT 0,
status TEXT NOT NULL DEFAULT 'pending',
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE UNIQUE INDEX IF NOT EXISTS uq_grid_cell
ON {s}.gaode_grid_cells
(cat, min_lng, min_lat, max_lng, max_lat)""",
f"""CREATE INDEX IF NOT EXISTS idx_grid_pending
ON {s}.gaode_grid_cells (cat, status)""",
# ── 社交平台原始证据层(小红书/大众点评/抖音 统一;抓一次挖多次)──
f"""CREATE TABLE IF NOT EXISTS {s}.social_evidence (
id BIGSERIAL PRIMARY KEY,
platform TEXT NOT NULL,
kind TEXT NOT NULL DEFAULT 'note',
source_id TEXT NOT NULL,
url TEXT,
entity_name TEXT,
place_natural_key TEXT,
keyword TEXT,
title TEXT,
content TEXT,
author TEXT,
author_id TEXT,
author_avatar TEXT,
likes INTEGER NOT NULL DEFAULT 0,
comments INTEGER NOT NULL DEFAULT 0,
collects INTEGER NOT NULL DEFAULT 0,
shares INTEGER NOT NULL DEFAULT 0,
publish_time TEXT,
location TEXT,
tags JSONB NOT NULL DEFAULT '[]',
image_urls JSONB NOT NULL DEFAULT '[]',
raw_jsonb JSONB NOT NULL DEFAULT '{{}}',
captured_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE UNIQUE INDEX IF NOT EXISTS uq_social_evidence
ON {s}.social_evidence (platform, source_id)""",
f"""CREATE INDEX IF NOT EXISTS idx_social_ev_entity
ON {s}.social_evidence (platform, place_natural_key)""",
# ── Agent settings (single-row JSON config) ─────────────────────────
f"""CREATE TABLE IF NOT EXISTS {s}.agent_settings (
settings_key TEXT PRIMARY KEY,
config JSONB NOT NULL DEFAULT '{{}}',
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
# ── Work-order timeline: one row per task transition ────────────────
f"""CREATE TABLE IF NOT EXISTS {s}.task_events (
id BIGSERIAL PRIMARY KEY,
task_id BIGINT NOT NULL,
from_status TEXT,
to_status TEXT,
action TEXT NOT NULL,
actor TEXT,
note TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)""",
f"""CREATE INDEX IF NOT EXISTS idx_task_events_task
ON {s}.task_events (task_id, created_at)""",
]
# ── Source Profiles ──────────────────────────────────────────────────────────
async def list_source_profiles(tenant_id: str, project_id: str) -> list[dict]:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {settings.db_schema}.source_profiles "
"WHERE tenant_id=%s AND project_id=%s ORDER BY authority_level DESC, source_name",
(tenant_id, project_id),
)
return await cur.fetchall()
async def create_source_profile(data: dict) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.source_profiles
(tenant_id, project_id, source_code, source_name, source_type,
description, api_endpoint, auth_method, update_frequency,
authority_level, enabled, metadata_jsonb)
VALUES (%(tenant_id)s, %(project_id)s, %(source_code)s, %(source_name)s,
%(source_type)s, %(description)s, %(api_endpoint)s, %(auth_method)s,
%(update_frequency)s, %(authority_level)s, %(enabled)s,
%(metadata_jsonb)s)
RETURNING *""",
{**data, "metadata_jsonb": json.dumps(data.get("metadata_jsonb", {}))},
)
row = await cur.fetchone()
await conn.commit()
return row
async def update_source_profile(profile_id: int, data: dict) -> dict | None:
s = settings.db_schema
sets = ", ".join(f"{k}=%({k})s" for k in data)
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.source_profiles SET {sets}, updated_at=now() WHERE id=%(id)s RETURNING *",
{**data, "id": profile_id},
)
row = await cur.fetchone()
await conn.commit()
return row
# ── Import Batches ───────────────────────────────────────────────────────────
async def list_batches(tenant_id: str, project_id: str, source_code: str | None = None) -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
q = f"SELECT * FROM {s}.import_batches WHERE tenant_id=%s AND project_id=%s"
params: list = [tenant_id, project_id]
if source_code:
q += " AND source_code=%s"
params.append(source_code)
q += " ORDER BY created_at DESC LIMIT 100"
await cur.execute(q, params)
return await cur.fetchall()
async def get_batch(batch_id: int) -> dict | None:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {settings.db_schema}.import_batches WHERE batch_id=%s",
(batch_id,),
)
return await cur.fetchone()
async def get_batch_raw_records(batch_id: int, limit: int = 100, offset: int = 0) -> list[dict]:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {settings.db_schema}.raw_records WHERE batch_id=%s "
"ORDER BY row_number LIMIT %s OFFSET %s",
(batch_id, limit, offset),
)
return await cur.fetchall()
async def get_batch_quality_summary(batch_id: int) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT total_rows, success_rows, failed_rows, status FROM {s}.import_batches WHERE batch_id=%s",
(batch_id,),
)
batch = await cur.fetchone()
if not batch:
return {}
await cur.execute(
f"SELECT COUNT(*) AS cnt, status FROM {s}.candidate_entities WHERE batch_id=%s GROUP BY status",
(batch_id,),
)
candidate_stats = await cur.fetchall()
return {
"batch": batch,
"candidates": {r["status"]: r["cnt"] for r in candidate_stats},
}
# ── Question Traces ──────────────────────────────────────────────────────────
async def create_question_trace(data: dict) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.question_traces
(tenant_id, project_id, source, origin, user_session, question_text,
parsed_intent_jsonb, asked_at)
VALUES (%(tenant_id)s, %(project_id)s, %(source)s, %(origin)s,
%(user_session)s, %(question_text)s, %(parsed_intent_jsonb)s,
COALESCE(%(asked_at)s, now()))
RETURNING *""",
{
"source": "simulated",
"origin": "panel",
"user_session": None,
"asked_at": None,
**data,
"parsed_intent_jsonb": json.dumps(data.get("parsed_intent_jsonb", {})),
},
)
row = await cur.fetchone()
await conn.commit()
return row
async def list_question_traces(
tenant_id: str, project_id: str,
source: str | None = None,
suggested_action: str | None = None,
limit: int = 50, offset: int = 0,
) -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
q = f"SELECT * FROM {s}.question_traces WHERE tenant_id=%s AND project_id=%s"
params: list = [tenant_id, project_id]
if source:
q += " AND source=%s"
params.append(source)
if suggested_action:
q += " AND suggested_action=%s"
params.append(suggested_action)
q += " ORDER BY created_at DESC LIMIT %s OFFSET %s"
params += [limit, offset]
await cur.execute(q, params)
return await cur.fetchall()
async def get_question_trace(trace_id: int) -> dict | None:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {settings.db_schema}.question_traces WHERE id=%s",
(trace_id,),
)
return await cur.fetchone()
async def update_question_trace(trace_id: int, data: dict) -> None:
s = settings.db_schema
sets = ", ".join(f"{k}=%({k})s" for k in data)
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.question_traces SET {sets} WHERE id=%(id)s",
{**data, "id": trace_id},
)
await conn.commit()
# ── Simulation Questions ─────────────────────────────────────────────────────
async def list_simulation_questions(tenant_id: str, project_id: str) -> list[dict]:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {settings.db_schema}.simulation_questions "
"WHERE tenant_id=%s AND project_id=%s ORDER BY created_at DESC",
(tenant_id, project_id),
)
return await cur.fetchall()
async def create_simulation_question(data: dict) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.simulation_questions
(tenant_id, project_id, question_text, scenario_tags, enabled, source)
VALUES (%(tenant_id)s, %(project_id)s, %(question_text)s,
%(scenario_tags)s, %(enabled)s, %(source)s)
RETURNING *""",
{
"source": "manual",
"enabled": True,
**data,
"scenario_tags": json.dumps(data.get("scenario_tags", [])),
},
)
row = await cur.fetchone()
await conn.commit()
return row
async def bulk_create_simulation_questions(
tenant_id: str, project_id: str, texts: list[str], source: str
) -> int:
"""Batch-insert questions; skips blanks and ones already present."""
s = settings.db_schema
cleaned = [t.strip() for t in texts if t and t.strip()]
if not cleaned:
return 0
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT question_text FROM {s}.simulation_questions
WHERE tenant_id=%s AND project_id=%s""",
(tenant_id, project_id),
)
existing = {r["question_text"] for r in await cur.fetchall()}
added = 0
for q in cleaned:
if q in existing:
continue
existing.add(q)
await cur.execute(
f"""INSERT INTO {s}.simulation_questions
(tenant_id, project_id, question_text, scenario_tags, enabled, source)
VALUES (%s, %s, %s, '[]', TRUE, %s)""",
(tenant_id, project_id, q, source),
)
added += 1
await conn.commit()
return added
async def adopt_hit_question_traces(tenant_id: str, project_id: str) -> int:
"""Pull questions the KG already answered well (suggested_action='hit')
into the regression set (source='resolved'), de-duplicated."""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT DISTINCT question_text FROM {s}.question_traces
WHERE tenant_id=%s AND project_id=%s
AND suggested_action='hit' AND question_text IS NOT NULL""",
(tenant_id, project_id),
)
hits = [r["question_text"] for r in await cur.fetchall()]
return await bulk_create_simulation_questions(tenant_id, project_id, hits, "resolved")
async def update_simulation_question(q_id: int, data: dict) -> dict | None:
s = settings.db_schema
if "scenario_tags" in data:
data["scenario_tags"] = json.dumps(data["scenario_tags"])
sets = ", ".join(f"{k}=%({k})s" for k in data)
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.simulation_questions SET {sets}, updated_at=now() WHERE id=%(id)s RETURNING *",
{**data, "id": q_id},
)
row = await cur.fetchone()
await conn.commit()
return row
async def delete_simulation_question(q_id: int) -> None:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"DELETE FROM {settings.db_schema}.simulation_questions WHERE id=%s",
(q_id,),
)
await conn.commit()
# ── Candidate Entities ───────────────────────────────────────────────────────
async def get_candidate_entity(entity_id: int) -> dict | None:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {settings.db_schema}.candidate_entities WHERE id=%s",
(entity_id,),
)
return await cur.fetchone()
async def list_candidate_entities(
tenant_id: str, project_id: str,
status: str | None = None,
entity_type: str | None = None,
limit: int = 50, offset: int = 0,
) -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
q = f"SELECT * FROM {s}.candidate_entities WHERE tenant_id=%s AND project_id=%s"
params: list = [tenant_id, project_id]
if status:
q += " AND status=%s"
params.append(status)
if entity_type:
q += " AND entity_type=%s"
params.append(entity_type)
q += " ORDER BY created_at DESC LIMIT %s OFFSET %s"
params += [limit, offset]
await cur.execute(q, params)
return await cur.fetchall()
async def update_candidate_entity(entity_id: int, data: dict) -> None:
s = settings.db_schema
sets = ", ".join(f"{k}=%({k})s" for k in data)
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.candidate_entities SET {sets}, updated_at=now() WHERE id=%(id)s",
{**data, "id": entity_id},
)
await conn.commit()
async def create_review_action(data: dict) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.review_actions
(candidate_id, candidate_type, action, actor, note, field_decisions_jsonb)
VALUES (%(candidate_id)s, %(candidate_type)s, %(action)s, %(actor)s,
%(note)s, %(field_decisions_jsonb)s)
RETURNING *""",
{
**data,
"field_decisions_jsonb": json.dumps(data.get("field_decisions_jsonb", {})),
},
)
row = await cur.fetchone()
await conn.commit()
return row
async def list_review_actions(candidate_id: int) -> list[dict]:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {settings.db_schema}.review_actions WHERE candidate_id=%s ORDER BY created_at DESC",
(candidate_id,),
)
return await cur.fetchall()
# ── Acquisition Tasks ────────────────────────────────────────────────────────
async def list_acquisition_tasks(
tenant_id: str, project_id: str,
status: str | None = None,
limit: int = 50, offset: int = 0,
) -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
q = f"SELECT * FROM {s}.acquisition_tasks WHERE tenant_id=%s AND project_id=%s"
params: list = [tenant_id, project_id]
if status:
q += " AND status=%s"
params.append(status)
q += " ORDER BY priority DESC, created_at DESC LIMIT %s OFFSET %s"
params += [limit, offset]
await cur.execute(q, params)
return await cur.fetchall()
async def create_acquisition_task(data: dict) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.acquisition_tasks
(tenant_id, project_id, created_by, triggered_by_trace_id, title,
description, scenario_tags, target_entity_types, target_fields,
suggested_collection_method, priority)
VALUES (%(tenant_id)s, %(project_id)s, %(created_by)s,
%(triggered_by_trace_id)s, %(title)s, %(description)s,
%(scenario_tags)s, %(target_entity_types)s, %(target_fields)s,
%(suggested_collection_method)s, %(priority)s)
RETURNING *""",
{
"triggered_by_trace_id": None,
"description": None,
"suggested_collection_method": None,
**data,
"scenario_tags": json.dumps(data.get("scenario_tags", [])),
"target_entity_types": json.dumps(data.get("target_entity_types", [])),
"target_fields": json.dumps(data.get("target_fields", [])),
},
)
row = await cur.fetchone()
await conn.commit()
return row
async def update_acquisition_task(task_id: int, data: dict) -> dict | None:
s = settings.db_schema
sets = ", ".join(f"{k}=%({k})s" for k in data)
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.acquisition_tasks SET {sets} WHERE id=%(id)s RETURNING *",
{**data, "id": task_id},
)
row = await cur.fetchone()
await conn.commit()
return row
async def get_acquisition_task(task_id: int) -> dict | None:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {settings.db_schema}.acquisition_tasks WHERE id=%s",
(task_id,),
)
return await cur.fetchone()
# ── Work-order timeline (task_events) ────────────────────────────────────────
async def log_task_event(
task_id: int, action: str, from_status: str | None,
to_status: str | None, actor: str | None, note: str | None = None,
) -> None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.task_events
(task_id, from_status, to_status, action, actor, note)
VALUES (%s, %s, %s, %s, %s, %s)""",
(task_id, from_status, to_status, action, actor, note),
)
await conn.commit()
async def list_task_events(task_id: int) -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT * FROM {s}.task_events
WHERE task_id=%s ORDER BY created_at, id""",
(task_id,),
)
return await cur.fetchall()
# ── Inventory Issues ─────────────────────────────────────────────────────────
async def list_inventory_issues(
tenant_id: str, project_id: str,
status: str | None = None,
severity: str | None = None,
limit: int = 100,
) -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
q = f"SELECT * FROM {s}.inventory_issues WHERE tenant_id=%s AND project_id=%s"
params: list = [tenant_id, project_id]
if status:
q += " AND status=%s"
params.append(status)
if severity:
q += " AND severity=%s"
params.append(severity)
q += " ORDER BY detected_at DESC LIMIT %s"
params.append(limit)
await cur.execute(q, params)
return await cur.fetchall()
async def create_inventory_issue(data: dict) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.inventory_issues
(tenant_id, project_id, issue_type, severity, target_entity_type,
target_natural_key, target_field, description)
VALUES (%(tenant_id)s, %(project_id)s, %(issue_type)s, %(severity)s,
%(target_entity_type)s, %(target_natural_key)s,
%(target_field)s, %(description)s)
RETURNING *""",
data,
)
row = await cur.fetchone()
await conn.commit()
return row
async def resolve_inventory_issue(issue_id: int, resolved_by: str, note: str | None) -> dict | None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.inventory_issues SET status='resolved', resolved_by=%s, "
"resolved_at=now(), resolution_note=%s WHERE id=%s RETURNING *",
(resolved_by, note, issue_id),
)
row = await cur.fetchone()
await conn.commit()
return row
# ── Vocabulary Terms ─────────────────────────────────────────────────────────
async def list_vocabulary_terms(
tenant_id: str, project_id: str,
entity_type: str | None = None,
search: str | None = None,
limit: int = 100,
) -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
q = f"SELECT * FROM {s}.vocabulary_terms WHERE tenant_id=%s AND project_id=%s"
params: list = [tenant_id, project_id]
if entity_type:
q += " AND entity_type=%s"
params.append(entity_type)
if search:
q += " AND (canonical_name ILIKE %s OR aliases::text ILIKE %s)"
params += [f"%{search}%", f"%{search}%"]
q += " ORDER BY entity_type, canonical_name LIMIT %s"
params.append(limit)
await cur.execute(q, params)
return await cur.fetchall()
async def create_vocabulary_term(data: dict) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.vocabulary_terms
(tenant_id, project_id, entity_type, canonical_name, aliases,
forbidden_aliases, notes, created_by)
VALUES (%(tenant_id)s, %(project_id)s, %(entity_type)s, %(canonical_name)s,
%(aliases)s, %(forbidden_aliases)s, %(notes)s, %(created_by)s)
RETURNING *""",
{
**data,
"aliases": json.dumps(data.get("aliases", [])),
"forbidden_aliases": json.dumps(data.get("forbidden_aliases", [])),
},
)
row = await cur.fetchone()
await conn.commit()
return row
async def update_vocabulary_term(term_id: int, data: dict) -> dict | None:
s = settings.db_schema
if "aliases" in data:
data["aliases"] = json.dumps(data["aliases"])
if "forbidden_aliases" in data:
data["forbidden_aliases"] = json.dumps(data["forbidden_aliases"])
sets = ", ".join(f"{k}=%({k})s" for k in data)
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.vocabulary_terms SET {sets}, updated_at=now() WHERE id=%(id)s RETURNING *",
{**data, "id": term_id},
)
row = await cur.fetchone()
await conn.commit()
return row
async def lookup_vocabulary(tenant_id: str, project_id: str, name: str) -> dict | None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {s}.vocabulary_terms WHERE tenant_id=%s AND project_id=%s "
"AND (canonical_name=%s OR aliases @> %s::jsonb)",
(tenant_id, project_id, name, json.dumps([name])),
)
return await cur.fetchone()
# ── Publish Jobs ─────────────────────────────────────────────────────────────
async def get_publish_job(job_id: int) -> dict | None:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {settings.db_schema}.publish_jobs WHERE id=%s",
(job_id,),
)
return await cur.fetchone()
async def list_publish_jobs(tenant_id: str, project_id: str, limit: int = 50) -> list[dict]:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {settings.db_schema}.publish_jobs "
"WHERE tenant_id=%s AND project_id=%s ORDER BY created_at DESC LIMIT %s",
(tenant_id, project_id, limit),
)
return await cur.fetchall()
async def update_publish_job(job_id: int, data: dict) -> dict | None:
s = settings.db_schema
sets = ", ".join(f"{k}=%({k})s" for k in data)
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.publish_jobs SET {sets} WHERE id=%(id)s RETURNING *",
{**data, "id": job_id},
)
row = await cur.fetchone()
await conn.commit()
return row
# ── Plaza / Stats ────────────────────────────────────────────────────────────
async def get_plaza_overview(tenant_id: str, project_id: str) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
# Total accessioned (approved + published)
await cur.execute(
f"SELECT COUNT(*) AS cnt FROM {s}.candidate_entities "
"WHERE tenant_id=%s AND project_id=%s AND status IN ('approved','published')",
(tenant_id, project_id),
)
accessioned = (await cur.fetchone())["cnt"]
# By status
await cur.execute(
f"SELECT status, COUNT(*) AS cnt FROM {s}.candidate_entities "
"WHERE tenant_id=%s AND project_id=%s GROUP BY status",
(tenant_id, project_id),
)
by_status = {r["status"]: r["cnt"] for r in await cur.fetchall()}
# By entity type
await cur.execute(
f"SELECT entity_type, COUNT(*) AS cnt FROM {s}.candidate_entities "
"WHERE tenant_id=%s AND project_id=%s GROUP BY entity_type ORDER BY cnt DESC",
(tenant_id, project_id),
)
by_type = [{"entity_type": r["entity_type"], "cnt": r["cnt"]} for r in await cur.fetchall()]
# ── Business-category aggregation (adaptive + sub-type drill-down) ──
acc = "status IN ('approved','published')"
# entity_type counts (accessioned only)
await cur.execute(
f"SELECT entity_type, COUNT(*) c FROM {s}.candidate_entities "
f"WHERE tenant_id=%s AND project_id=%s AND {acc} GROUP BY entity_type",
(tenant_id, project_id),
)
et_cnt = {r["entity_type"]: r["c"] for r in await cur.fetchall()}
# Place sub-types: station_type first, else place_type
await cur.execute(
f"""SELECT COALESCE(payload_jsonb->>'station_type',
payload_jsonb->>'place_type', '未分类') AS sub,
(payload_jsonb->>'station_type') IS NOT NULL AS is_station,
COUNT(*) c
FROM {s}.candidate_entities
WHERE tenant_id=%s AND project_id=%s AND entity_type='Place' AND {acc}
GROUP BY 1, 2""",
(tenant_id, project_id),
)
place_rows = await cur.fetchall()
ET_LABEL = {"Area": "行政区", "Source": "来源", "Evidence": "证据",
"ExperienceTag": "体验标签", "RouteTemplate": "线路模板",
"BusLine": "公交线路"}
SUB_LABEL = {"地铁站": "地铁", "公交站": "公交", "eat": "美食",
"drink": "饮品", "walk": "漫步", "transit_stop": "公交",
"sight": "景点", "hotel": "酒店", "mall": "商场",
"poi": "其他地点"}
categories: list[dict] = []
# Area + other simple entity types
for et, c in et_cnt.items():
if et == "Place" or c <= 0:
continue
categories.append({"key": et, "label": ET_LABEL.get(et, et),
"total": c, "sub": []})
# Place → 公共交通 (station_type sub) + other place_type as own categories
transit_sub, transit_total = [], 0
for r in place_rows:
lbl = SUB_LABEL.get(r["sub"], r["sub"])
if r["is_station"]:
transit_sub.append({"label": lbl, "count": r["c"]})
transit_total += r["c"]
else:
categories.append({"key": f"place:{r['sub']}", "label": lbl,
"total": r["c"], "sub": []})
if transit_total > 0:
transit_sub.sort(key=lambda x: x["count"], reverse=True)
categories.append({"key": "transit", "label": "公共交通",
"total": transit_total, "sub": transit_sub})
categories.sort(key=lambda x: x["total"], reverse=True)
# Graph releases
await cur.execute(
f"SELECT id, graph_name, alias, status, "
f"published_at, activated_at, updated_at FROM {s}.graph_releases "
"WHERE tenant_id=%s AND project_id=%s "
f"ORDER BY updated_at DESC LIMIT 10",
(tenant_id, project_id),
)
releases = await cur.fetchall()
# Pending review count
pending = by_status.get("pending_review", 0)
return {
"accessioned_entities": accessioned,
"pending_review": pending,
"by_status": by_status,
"by_type": by_type,
"categories": categories,
"graph_releases": [
{
"id": r["id"],
"graph_name": r["graph_name"],
"label": r["alias"],
"status": r["status"],
"published_at": str(r["published_at"]),
"updated_at": str(r["updated_at"]),
}
for r in releases
],
}
async def get_audit_gaps(tenant_id: str, project_id: str) -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {s}.question_traces "
"WHERE tenant_id=%s AND project_id=%s AND suggested_action='gap' "
"AND acquisition_task_id IS NULL "
"ORDER BY evaluated_at DESC LIMIT 50",
(tenant_id, project_id),
)
return await cur.fetchall()
async def get_plaza_alerts(tenant_id: str, project_id: str) -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {s}.inventory_issues "
"WHERE tenant_id=%s AND project_id=%s AND status='open' "
"ORDER BY CASE severity WHEN 'blocker' THEN 1 WHEN 'warning' THEN 2 ELSE 3 END, "
"detected_at DESC LIMIT 20",
(tenant_id, project_id),
)
return await cur.fetchall()
# ── Agent Call Logs ──────────────────────────────────────────────────────────
async def log_agent_call(data: dict) -> None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.agent_call_logs
(agent_name, project_id, actor, request_id, model,
prompt_chars, response_chars, latency_ms, status, error_message)
VALUES (%(agent_name)s, %(project_id)s, %(actor)s, %(request_id)s,
%(model)s, %(prompt_chars)s, %(response_chars)s,
%(latency_ms)s, %(status)s, %(error_message)s)""",
data,
)
await conn.commit()
# ── RBAC: roles ──────────────────────────────────────────────────────────────
async def list_roles() -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {s}.rbac_roles ORDER BY sort_order, role_key"
)
return await cur.fetchall()
async def create_role(data: dict) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.rbac_roles (role_key, label, description, is_system, sort_order)
VALUES (%(role_key)s, %(label)s, %(description)s, FALSE,
COALESCE(%(sort_order)s, 100)) RETURNING *""",
{"sort_order": None, "description": None, **data},
)
row = await cur.fetchone()
await conn.commit()
return row
async def update_role(role_key: str, data: dict) -> dict | None:
s = settings.db_schema
if not data:
return None
sets = ", ".join(f"{k}=%({k})s" for k in data)
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.rbac_roles SET {sets} WHERE role_key=%(role_key)s RETURNING *",
{**data, "role_key": role_key},
)
row = await cur.fetchone()
await conn.commit()
return row
async def delete_role(role_key: str) -> bool:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT is_system FROM {s}.rbac_roles WHERE role_key=%s", (role_key,)
)
r = await cur.fetchone()
if not r:
return False
if r["is_system"]:
raise ValueError("系统内置角色不可删除")
await cur.execute(f"DELETE FROM {s}.rbac_roles WHERE role_key=%s", (role_key,))
await conn.commit()
return True
# ── RBAC: capabilities & matrix ──────────────────────────────────────────────
async def list_capabilities() -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {s}.rbac_capabilities ORDER BY sort_order, cap_key"
)
return await cur.fetchall()
async def create_capability(data: dict) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.rbac_capabilities (cap_key, label, sort_order)
VALUES (%(cap_key)s, %(label)s, COALESCE(%(sort_order)s, 100))
RETURNING *""",
{"sort_order": None, **data},
)
row = await cur.fetchone()
await conn.commit()
return row
async def delete_capability(cap_key: str) -> bool:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"DELETE FROM {s}.rbac_capabilities WHERE cap_key=%s", (cap_key,)
)
await conn.commit()
return True
async def get_permission_matrix() -> dict:
"""Returns roles, capabilities and the cell values for the editable grid."""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {s}.rbac_roles ORDER BY sort_order, role_key"
)
roles = await cur.fetchall()
await cur.execute(
f"SELECT * FROM {s}.rbac_capabilities ORDER BY sort_order, cap_key"
)
caps = await cur.fetchall()
await cur.execute(f"SELECT role_key, cap_key, value FROM {s}.rbac_role_caps")
cells = await cur.fetchall()
matrix: dict[str, dict[str, str]] = {}
for c in cells:
matrix.setdefault(c["cap_key"], {})[c["role_key"]] = c["value"]
return {"roles": roles, "capabilities": caps, "matrix": matrix}
async def set_role_cap(role_key: str, cap_key: str, value: str) -> None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.rbac_role_caps (role_key, cap_key, value)
VALUES (%s, %s, %s)
ON CONFLICT (role_key, cap_key) DO UPDATE SET value=EXCLUDED.value""",
(role_key, cap_key, value),
)
await conn.commit()
# ── Accounts: users ──────────────────────────────────────────────────────────
async def list_users() -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT u.id, u.username, u.full_name, u.phone, u.status,
u.created_at, u.updated_at,
COALESCE((SELECT array_agg(ur.role_key)
FROM {s}.user_roles ur
WHERE ur.user_id=u.id), '{{}}') AS roles,
COALESCE((SELECT array_agg(a.name ORDER BY a.area_id)
FROM {s}.areas a
WHERE a.responsible_user_id=u.id), '{{}}') AS area_names,
COALESCE((SELECT array_agg(a.area_id ORDER BY a.area_id)
FROM {s}.areas a
WHERE a.responsible_user_id=u.id), '{{}}') AS area_ids
FROM {s}.users u
ORDER BY u.created_at"""
)
return await cur.fetchall()
async def get_user_auth(username: str) -> dict | None:
"""For authentication — includes hashed_password and role keys."""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT u.id, u.username, u.full_name, u.hashed_password, u.status,
COALESCE(array_agg(ur.role_key)
FILTER (WHERE ur.role_key IS NOT NULL), '{{}}') AS roles
FROM {s}.users u
LEFT JOIN {s}.user_roles ur ON ur.user_id = u.id
WHERE u.username=%s
GROUP BY u.id""",
(username,),
)
return await cur.fetchone()
async def create_user(data: dict, role_keys: list[str]) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.users (username, full_name, phone, hashed_password, status)
VALUES (%(username)s, %(full_name)s, %(phone)s, %(hashed_password)s,
COALESCE(%(status)s, 'active'))
RETURNING id, username, full_name, phone, status, created_at""",
{"full_name": None, "phone": None, "status": None, **data},
)
row = await cur.fetchone()
for rk in role_keys:
await cur.execute(
f"""INSERT INTO {s}.user_roles (user_id, role_key) VALUES (%s, %s)
ON CONFLICT DO NOTHING""",
(row["id"], rk),
)
await conn.commit()
row["roles"] = role_keys
return row
async def update_user(user_id: int, data: dict, role_keys: list[str] | None) -> dict | None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
if data:
sets = ", ".join(f"{k}=%({k})s" for k in data)
await cur.execute(
f"UPDATE {s}.users SET {sets}, updated_at=now() WHERE id=%(id)s",
{**data, "id": user_id},
)
if role_keys is not None:
await cur.execute(
f"DELETE FROM {s}.user_roles WHERE user_id=%s", (user_id,)
)
for rk in role_keys:
await cur.execute(
f"""INSERT INTO {s}.user_roles (user_id, role_key) VALUES (%s, %s)
ON CONFLICT DO NOTHING""",
(user_id, rk),
)
await cur.execute(
f"""SELECT u.id, u.username, u.full_name, u.phone, u.status,
COALESCE(array_agg(ur.role_key)
FILTER (WHERE ur.role_key IS NOT NULL), '{{}}') AS roles
FROM {s}.users u LEFT JOIN {s}.user_roles ur ON ur.user_id=u.id
WHERE u.id=%s GROUP BY u.id""",
(user_id,),
)
row = await cur.fetchone()
await conn.commit()
return row
async def delete_user(user_id: int) -> bool:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(f"DELETE FROM {s}.users WHERE id=%s", (user_id,))
await conn.commit()
return True
# ── City areas & responsibility (P3) ─────────────────────────────────────────
async def list_areas() -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT a.area_id, a.name, a.level, a.parent_id,
a.responsible_user_id, a.note,
u.username AS responsible_username,
u.full_name AS responsible_name,
a.updated_at
FROM {s}.areas a
LEFT JOIN {s}.users u ON u.id = a.responsible_user_id
ORDER BY a.area_id"""
)
return await cur.fetchall()
async def upsert_area(
area_id: str, name: str,
level: str | None = None, parent_id: str | None = None,
) -> None:
"""Used by sync-from-graph; never overwrites a manual responsible mapping."""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.areas (area_id, name, level, parent_id)
VALUES (%s, %s, %s, %s)
ON CONFLICT (area_id) DO UPDATE
SET name=EXCLUDED.name, level=EXCLUDED.level,
parent_id=EXCLUDED.parent_id, updated_at=now()""",
(area_id, name, level, parent_id),
)
await conn.commit()
async def upsert_custom_area(name: str) -> str:
"""Free-text area typed by an admin; stable id derived from the name."""
import hashlib
s = settings.db_schema
area_id = "c_" + hashlib.md5(name.strip().encode()).hexdigest()[:10]
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.areas (area_id, name, level)
VALUES (%s, %s, 'custom')
ON CONFLICT (area_id) DO UPDATE SET name=EXCLUDED.name""",
(area_id, name.strip()),
)
await conn.commit()
return area_id
async def list_user_areas(user_id: int) -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT area_id, name, level FROM {s}.areas
WHERE responsible_user_id=%s ORDER BY area_id""",
(user_id,),
)
return await cur.fetchall()
async def set_user_areas(user_id: int, area_ids: list[str]) -> None:
"""Make user the responsible person for exactly `area_ids`
(clears areas they previously owned but are no longer selected)."""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""UPDATE {s}.areas SET responsible_user_id=NULL, updated_at=now()
WHERE responsible_user_id=%s AND NOT (area_id = ANY(%s))""",
(user_id, area_ids or [""]),
)
for aid in area_ids:
await cur.execute(
f"""UPDATE {s}.areas SET responsible_user_id=%s, updated_at=now()
WHERE area_id=%s""",
(user_id, aid),
)
await conn.commit()
async def update_area(area_id: str, data: dict) -> dict | None:
s = settings.db_schema
if not data:
return None
sets = ", ".join(f"{k}=%({k})s" for k in data)
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.areas SET {sets}, updated_at=now() "
f"WHERE area_id=%(area_id)s RETURNING *",
{**data, "area_id": area_id},
)
row = await cur.fetchone()
await conn.commit()
return row
async def get_area_responsible(area_id: str) -> dict | None:
"""Resolve the collector responsible for an area (for gap routing)."""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT u.id, u.username, u.full_name
FROM {s}.areas a
JOIN {s}.users u ON u.id = a.responsible_user_id
WHERE a.area_id=%s AND u.status='active'""",
(area_id,),
)
return await cur.fetchone()
# ── Gap routing + notifications (P4) ─────────────────────────────────────────
async def get_user_id_by_username(username: str) -> int | None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(f"SELECT id FROM {s}.users WHERE username=%s", (username,))
row = await cur.fetchone()
return row["id"] if row else None
async def resolve_area_from_entities(entity_ids: list) -> str | None:
"""Most common area_id among the matched candidate entities' payloads."""
if not entity_ids:
return None
s = settings.db_schema
ids = [int(x) for x in entity_ids if str(x).isdigit()]
if not ids:
return None
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT payload->>'area_id' AS area_id "
f"FROM {s}.candidate_entities WHERE id = ANY(%s)",
(ids,),
)
rows = await cur.fetchall()
from collections import Counter
vals = [r["area_id"] for r in rows if r.get("area_id")]
return Counter(vals).most_common(1)[0][0] if vals else None
async def set_task_routing(task_id: int, area_id: str | None, assignee: str) -> None:
"""Auto-assign a gap task to the area's responsible collector."""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""UPDATE {s}.acquisition_tasks
SET area_id=%s, assignee=%s, status='assigned', assigned_at=now()
WHERE id=%s""",
(area_id, assignee, task_id),
)
await conn.commit()
async def create_notification(
user_id: int, title: str, body: str | None = None,
ntype: str = "task", related_task_id: int | None = None,
area_id: str | None = None,
) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.notifications
(user_id, type, title, body, related_task_id, area_id)
VALUES (%s, %s, %s, %s, %s, %s) RETURNING *""",
(user_id, ntype, title, body, related_task_id, area_id),
)
row = await cur.fetchone()
await conn.commit()
return row
async def list_notifications(user_id: int, only_unread: bool = False) -> list[dict]:
s = settings.db_schema
q = f"SELECT * FROM {s}.notifications WHERE user_id=%s"
if only_unread:
q += " AND is_read=FALSE"
q += " ORDER BY created_at DESC LIMIT 100"
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(q, (user_id,))
return await cur.fetchall()
async def unread_notification_count(user_id: int) -> int:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT count(*) AS c FROM {s}.notifications "
f"WHERE user_id=%s AND is_read=FALSE",
(user_id,),
)
row = await cur.fetchone()
return row["c"] if row else 0
async def mark_notification_read(notif_id: int, user_id: int) -> bool:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.notifications SET is_read=TRUE "
f"WHERE id=%s AND user_id=%s",
(notif_id, user_id),
)
await conn.commit()
return True
async def mark_all_notifications_read(user_id: int) -> int:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.notifications SET is_read=TRUE "
f"WHERE user_id=%s AND is_read=FALSE",
(user_id,),
)
n = cur.rowcount
await conn.commit()
return n
# ── Agent settings (LLM + per-sub-agent config) ──────────────────────────────
def _default_agent(label: str, enabled: bool = True) -> dict:
return {
"label": label,
"enabled": enabled,
"base_url": "", # blank = inherit global base_url
"model": "", # blank = inherit global model
"api_key": "", # blank = inherit global api_key
"multimodal_enabled": False,
"multimodal_model": "", # e.g. a vision model for POI photos
}
DEFAULT_AGENT_CONFIG: dict = {
"global": {"base_url": "", "model": "deepseek-chat", "api_key": "", "timeout": 30},
"thresholds": {
"audit_hit": 0.7,
"audit_gap": 0.4,
"aligner_auto_merge": 0.95,
},
"agents": {
"auditor": _default_agent("AI 质量稽查"),
"aligner": _default_agent("实体归一"),
"extraction": _default_agent("值归一", enabled=False),
"field_mapping": _default_agent("字段映射"),
"schema_lint": _default_agent("Schema 审计"),
"publisher": _default_agent("图谱发布"),
"web_agent": _default_agent("联网采集 web_agent"),
"xhs_agent": _default_agent("小红书采集 xhs_agent"),
"douyin_agent": _default_agent("抖音采集 douyin_agent"),
},
# 多模型蒸馏质量闸门:同一事实并发问 N 个模型,综合裁决
# 一致且合理→自动入库 / 明确错→丢弃 / 分歧→转人工(少人工核心)
"distill": {
"enabled": True,
"policy": "auto_high_conf", # 高置信自动入库,分歧转人工
"timeout": 45,
"models": {
"openai": {"label": "OpenAI", "enabled": False,
"base_url": "https://api.openai.com/v1",
"api_key": "", "model": "gpt-4o-mini"},
"deepseek": {"label": "DeepSeek", "enabled": False,
"base_url": "https://api.deepseek.com/v1",
"api_key": "", "model": "deepseek-chat"},
"doubao": {"label": "火山豆包", "enabled": False,
"base_url": "https://ark.cn-beijing.volces.com/api/v3",
"api_key": "", "model": ""},
"qwen": {"label": "千问 Qwen", "enabled": False,
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"api_key": "", "model": "qwen-plus"},
"anthropic": {"label": "Anthropic", "enabled": False,
"base_url": "https://api.apiyi.com/v1",
"api_key": "", "model": "claude-3-7-sonnet-20250219"},
},
},
# 多模型知识抽取(web/event 走这条) —— 3 抽 + 1 决策, **独立 API 池**(与 distill 分开)
# 主 agent (opus/global) 不下场抽取, 只调度; 抽取走这里独立模型, 避免一处欠费全瘫
# 用户控制: 启用的 model 即被用作"抽取器", 决策器(aggregator)单选指定
# 决策器自动从抽取器列表排除, 避免投票偏置
"extract": {
"enabled": True,
"timeout": 90,
# 抽取池: 独立的 API 配置(与 distill.models 分开)
"models": {
"openai": {"label": "OpenAI", "enabled": False,
"base_url": "https://api.openai.com/v1",
"api_key": "", "model": "gpt-4o-mini",
"max_tokens": None},
"deepseek": {"label": "DeepSeek", "enabled": True,
"base_url": "https://api.deepseek.com/v1",
"api_key": "", "model": "deepseek-chat",
"max_tokens": None},
"doubao": {"label": "火山豆包", "enabled": True,
"base_url": "https://ark.cn-beijing.volces.com/api/v3",
"api_key": "", "model": "",
"max_tokens": None},
"qwen": {"label": "千问 Qwen", "enabled": True,
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"api_key": "", "model": "qwen-plus",
"max_tokens": None},
"anthropic": {"label": "Anthropic", "enabled": False,
"base_url": "https://api.apiyi.com/v1",
"api_key": "", "model": "claude-3-7-sonnet-20250219",
"max_tokens": None},
},
# 决策器(单选 key): 从抽取池中指定一个; 默认 deepseek(快+稳)
"aggregator": "deepseek",
},
}
async def get_agent_settings() -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT config FROM {s}.agent_settings WHERE settings_key='agent'"
)
row = await cur.fetchone()
if not row or not row.get("config"):
return DEFAULT_AGENT_CONFIG
# shallow-merge over defaults so newly added keys/agents still appear
cfg = row["config"]
merged = {**DEFAULT_AGENT_CONFIG, **cfg}
merged["global"] = {**DEFAULT_AGENT_CONFIG["global"], **cfg.get("global", {})}
merged["thresholds"] = {**DEFAULT_AGENT_CONFIG["thresholds"], **cfg.get("thresholds", {})}
# deep-merge each agent so newly added per-agent fields always appear
saved_agents = cfg.get("agents", {})
agents: dict = {}
for key, dft in DEFAULT_AGENT_CONFIG["agents"].items():
agents[key] = {**dft, **saved_agents.get(key, {})}
for key, val in saved_agents.items():
if key not in agents:
agents[key] = val
merged["agents"] = agents
# deep-merge distill gate (so newly added models always surface)
dft_d = DEFAULT_AGENT_CONFIG["distill"]
saved_d = cfg.get("distill", {})
dm = {**dft_d, **saved_d}
dm["models"] = {}
for mk, mdft in dft_d["models"].items():
dm["models"][mk] = {**mdft, **saved_d.get("models", {}).get(mk, {})}
for mk, mv in saved_d.get("models", {}).items():
dm["models"].setdefault(mk, mv)
merged["distill"] = dm
# deep-merge extract (multi-model knowledge extraction, 独立 API 池)
dft_e = DEFAULT_AGENT_CONFIG["extract"]
saved_e = cfg.get("extract", {})
em = {**dft_e, **saved_e}
em["models"] = {}
for mk, mdft in dft_e["models"].items():
em["models"][mk] = {**mdft,
**(saved_e.get("models", {}) or {}).get(mk, {})}
for mk, mv in (saved_e.get("models", {}) or {}).items():
em["models"].setdefault(mk, mv)
# 兼容旧字段名 extractors → models 的迁移(只读, 不再写)
if "extractors" in saved_e and isinstance(saved_e["extractors"], dict):
for mk, role in saved_e["extractors"].items():
if mk in em["models"]:
# 老配置只有 enabled 标志, 合并到新 model 的 enabled
em["models"][mk]["enabled"] = bool(
(role or {}).get("enabled",
em["models"][mk].get("enabled", False)))
merged["extract"] = em
return merged
async def save_agent_settings(config: dict) -> dict:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.agent_settings (settings_key, config, updated_at)
VALUES ('agent', %s, now())
ON CONFLICT (settings_key)
DO UPDATE SET config=EXCLUDED.config, updated_at=now()""",
(json.dumps(config),),
)
await conn.commit()
return config
# ── Audit runs (async progress tracking) ─────────────────────────────────────
_ACTION_COL = {
"hit": "hits", "gap": "gaps",
"low_quality": "low_quality", "conflict": "conflicts",
}
async def create_audit_run(source: str, total: int) -> int:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.audit_runs (source, total, status)
VALUES (%s, %s, 'running') RETURNING id""",
(source, total),
)
rid = (await cur.fetchone())["id"]
await conn.commit()
return rid
async def bump_audit_run(run_id: int, action: str) -> None:
s = settings.db_schema
col = _ACTION_COL.get(action)
set_cnt = f", {col}={col}+1" if col else ""
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.audit_runs SET done=done+1{set_cnt} WHERE id=%s",
(run_id,),
)
await conn.commit()
async def finish_audit_run(run_id: int, status: str = "done",
error: str | None = None) -> None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""UPDATE {s}.audit_runs
SET status=%s, error=%s, finished_at=now() WHERE id=%s""",
(status, error, run_id),
)
await conn.commit()
async def get_audit_run(run_id: int) -> dict | None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(f"SELECT * FROM {s}.audit_runs WHERE id=%s", (run_id,))
return await cur.fetchone()
async def get_latest_audit_run() -> dict | None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {s}.audit_runs ORDER BY id DESC LIMIT 1"
)
return await cur.fetchone()
# ── Super Agent runs (guarded autonomous loop) ───────────────────────────────
async def sa_create_run(goal: str, max_steps: int, budget_pois: int) -> int:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.super_agent_runs (goal, max_steps, budget_pois)
VALUES (%s,%s,%s) RETURNING id""",
(goal, max_steps, budget_pois))
rid = (await cur.fetchone())["id"]
await conn.commit()
return rid
async def sa_get_run(run_id: int) -> dict | None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(f"SELECT * FROM {s}.super_agent_runs WHERE id=%s", (run_id,))
return await cur.fetchone()
async def sa_latest_run() -> dict | None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT * FROM {s}.super_agent_runs ORDER BY id DESC LIMIT 1")
return await cur.fetchone()
async def sa_append_step(run_id: int, step_obj: dict, ingested_delta: int = 0) -> None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""UPDATE {s}.super_agent_runs
SET log = log || %s::jsonb,
step = step + 1,
ingested = ingested + %s
WHERE id=%s""",
(json.dumps([step_obj], ensure_ascii=False), ingested_delta, run_id))
await conn.commit()
async def sa_finish(run_id: int, status: str = "done", error: str | None = None) -> None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""UPDATE {s}.super_agent_runs
SET status=%s, error=%s, finished_at=now() WHERE id=%s""",
(status, error, run_id))
await conn.commit()
async def sa_request_stop(run_id: int) -> None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.super_agent_runs SET stop_requested=TRUE WHERE id=%s",
(run_id,))
await conn.commit()
async def sa_stop_requested(run_id: int) -> bool:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"SELECT stop_requested FROM {s}.super_agent_runs WHERE id=%s", (run_id,))
r = await cur.fetchone()
return bool(r and r["stop_requested"])
# ── Super Agent work-order ledger + escalation helpers ───────────────────────
async def sa_add_task(run_id: int, step: int, category: str | None,
action: str, plan_reason: str | None, tool: str | None,
result: dict | None = None, status: str = "done",
note: str | None = None,
related_task_id: int | None = None) -> int:
"""Append one work-order row (台账) — what the AI did, when, with what result."""
s = settings.db_schema
r = result or {}
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""INSERT INTO {s}.super_agent_tasks
(run_id, step, category, action, plan_reason, tool,
fetched, approved, pending, skipped, status, note,
related_task_id)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
RETURNING id""",
(run_id, step, category, action, plan_reason, tool,
r.get("fetched", 0), r.get("approved", 0), r.get("pending", 0),
r.get("skipped", 0), status, note, related_task_id))
tid = (await cur.fetchone())["id"]
await conn.commit()
return tid
async def sa_list_tasks(run_id: int | None = None, limit: int = 200) -> list[dict]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
if run_id:
await cur.execute(
f"""SELECT * FROM {s}.super_agent_tasks
WHERE run_id=%s ORDER BY id DESC LIMIT %s""",
(run_id, limit))
else:
await cur.execute(
f"""SELECT * FROM {s}.super_agent_tasks
ORDER BY id DESC LIMIT %s""", (limit,))
return await cur.fetchall()
async def sa_set_status(run_id: int, status: str) -> None:
"""Change run status WITHOUT marking it finished (used for 驻守巡检/恢复)."""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.super_agent_runs SET status=%s WHERE id=%s",
(status, run_id))
await conn.commit()
async def _sa_reap_orphans() -> None:
"""On startup, runs left 'running'/'stewarding' lost their bg task with
the old process — mark them stopped so the UI doesn't poll a dead run."""
s = settings.db_schema
try:
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""UPDATE {s}.super_agent_runs
SET status='stopped', finished_at=now()
WHERE status IN ('running','stewarding')""")
await conn.commit()
except Exception:
pass
async def sa_has_open_escalation(category: str) -> bool:
"""An un-closed Super-Agent escalation work-order already exists for cat?"""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT 1 FROM {s}.acquisition_tasks
WHERE created_by='super_agent'
AND scenario_tags @> %s::jsonb
AND status NOT IN ('completed','cancelled','review_approved')
LIMIT 1""",
(json.dumps([category]),))
return (await cur.fetchone()) is not None
async def get_admin_user_id() -> int | None:
"""First user holding the 'admin' role — the escalation target."""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT u.id FROM {s}.users u
JOIN {s}.user_roles ur ON ur.user_id = u.id
WHERE ur.role_key='admin' AND u.status='active'
ORDER BY u.id LIMIT 1""")
row = await cur.fetchone()
if row:
return row["id"]
await cur.execute(
f"SELECT id FROM {s}.users WHERE username=%s",
(settings.auth_default_username,))
row = await cur.fetchone()
return row["id"] if row else None
# ── 高德全城网格:分页记忆 + 枯竭持久化(跨轮不重复烧额度)──────────────────
async def grid_seed(cat: str, typecode: str,
cells: list[tuple]) -> int:
"""批量播种网格单元(已存在则跳过)。返回新增数。"""
s = settings.db_schema
n = 0
async with get_conn() as conn:
async with conn.cursor() as cur:
for (mnlng, mnlat, mxlng, mxlat) in cells:
await cur.execute(
f"""INSERT INTO {s}.gaode_grid_cells
(cat,typecode,min_lng,min_lat,max_lng,max_lat,depth)
VALUES (%s,%s,%s,%s,%s,%s,0)
ON CONFLICT (cat,min_lng,min_lat,max_lng,max_lat)
DO NOTHING""",
(cat, typecode, mnlng, mnlat, mxlng, mxlat))
n += cur.rowcount or 0
await conn.commit()
return n
async def grid_counts() -> dict:
"""每个 cat 的网格 总数/已扫(枯竭)数。
业务口径:仅统计 d0 父格(168/cat 统一),细分子格(d1/d2)对前端透明;
d0 父格被切分时会标 exhausted,所以仍准确反映该区域是否覆盖。
"""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT cat,
COUNT(*) total,
COUNT(*) FILTER (WHERE status='exhausted') done
FROM {s}.gaode_grid_cells
WHERE depth=0
GROUP BY cat""")
return {r["cat"]: {"total": r["total"], "done": r["done"]}
for r in await cur.fetchall()}
async def grid_pending_cats() -> list[str]:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT DISTINCT cat FROM {s}.gaode_grid_cells
WHERE status='pending'""")
return [r["cat"] for r in await cur.fetchall()]
async def grid_take_next(cat: str) -> dict | None:
"""取该 cat 一个待扫网格(浅层优先,保证先粗后细)。"""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT * FROM {s}.gaode_grid_cells
WHERE cat=%s AND status='pending'
ORDER BY depth, id LIMIT 1""", (cat,))
return await cur.fetchone()
async def grid_update(cell_id: int, next_page: int,
fetched_delta: int, status: str) -> None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""UPDATE {s}.gaode_grid_cells
SET next_page=%s, fetched=fetched+%s,
status=%s, updated_at=now()
WHERE id=%s""",
(next_page, fetched_delta, status, cell_id))
await conn.commit()
async def grid_subdivide(parent_id: int, cat: str, typecode: str,
children: list[tuple], depth: int) -> None:
"""父格标记枯竭,插入 4 个子格继续深扫(保证稠密区不漏)。"""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"UPDATE {s}.gaode_grid_cells SET status='exhausted',"
f" updated_at=now() WHERE id=%s", (parent_id,))
for (mnlng, mnlat, mxlng, mxlat) in children:
await cur.execute(
f"""INSERT INTO {s}.gaode_grid_cells
(cat,typecode,min_lng,min_lat,max_lng,max_lat,depth)
VALUES (%s,%s,%s,%s,%s,%s,%s)
ON CONFLICT (cat,min_lng,min_lat,max_lng,max_lat)
DO NOTHING""",
(cat, typecode, mnlng, mnlat, mxlng, mxlat, depth))
await conn.commit()
# ── 蒸馏闸门:待审候选 读取/裁决落库 ─────────────────────────────────────────
async def sa_pending_places(limit: int = 12) -> list[dict]:
"""Super Agent 采集进 pending_review 的 Place 候选,待蒸馏核验。"""
s = settings.db_schema
T, P = settings.default_tenant, settings.default_project
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT id, natural_key, display_name, payload_jsonb
FROM {s}.candidate_entities
WHERE tenant_id=%s AND project_id=%s
AND entity_type='Place' AND status='pending_review'
AND reviewed_by='super_agent'
ORDER BY id LIMIT %s""",
(T, P, limit))
return await cur.fetchall()
async def sa_set_candidate_status(cand_id: int, status: str,
reviewer: str = "distill_gate") -> None:
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""UPDATE {s}.candidate_entities
SET status=%s, reviewed_by=%s WHERE id=%s""",
(status, reviewer, cand_id))
await conn.commit()
async def sa_record_conflict(natural_key: str, field: str,
existing: str, distilled: str,
note: str = "") -> int | None:
"""蒸馏共识与图谱既有值矛盾 → 落 validation_issues(distill_conflict)
不覆盖图谱,转人工裁决。返回 issue id。"""
s = settings.db_schema
T, P = settings.default_tenant, settings.default_project
msg = (f"[蒸馏×图谱矛盾] 字段「{field}」:图谱既有「{str(existing)[:300]}"
f"vs 蒸馏共识「{str(distilled)[:300]}」。{note}".strip())
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT id, batch_id FROM {s}.candidate_entities
WHERE tenant_id=%s AND project_id=%s AND natural_key=%s
LIMIT 1""", (T, P, natural_key))
row = await cur.fetchone()
cid = row["id"] if row else None
bid = row["batch_id"] if row else None
if bid is None: # FK 兜底:取最近一个批次
await cur.execute(
f"""SELECT id FROM {s}.import_batches
WHERE tenant_id=%s ORDER BY id DESC LIMIT 1""", (T,))
br = await cur.fetchone()
if not br:
return None # 无批次可挂,安全跳过
bid = br["id"]
sug = json.dumps({"source": "distill", "field": field,
"existing": existing, "distilled": distilled,
"note": note}, ensure_ascii=False)
await cur.execute(
f"""INSERT INTO {s}.validation_issues
(batch_id, candidate_entity_id, severity, issue_code,
message, suggestion_jsonb, status)
VALUES (%s, %s, 'warning', 'distill_conflict',
%s, %s, 'open')
RETURNING id""",
(bid, cid, msg, sug))
iid = (await cur.fetchone())["id"]
await conn.commit()
return iid
async def sa_record_schema_proposal(column_name: str, field_name: str,
sample_value: str, reason: str,
confidence: float = 0.7) -> int | None:
"""web_agent 发现 schema 没有的属性 → 落 field_proposals(pending)
schema 自演进入口。同名 pending 已存在则去重跳过。"""
s = settings.db_schema
T, P = settings.default_tenant, settings.default_project
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT id FROM {s}.field_proposals
WHERE tenant_id=%s AND project_id=%s
AND column_name=%s AND status='pending' LIMIT 1""",
(T, P, column_name))
if await cur.fetchone():
return None
await cur.execute(
f"""INSERT INTO {s}.field_proposals
(tenant_id, project_id, column_name, sample_values_jsonb,
suggested_entity_type, suggested_field_name,
suggested_value_type, confidence, reason, status,
created_at, updated_at)
VALUES (%s,%s,%s,%s,'Place',%s,'text',%s,%s,'pending',
now(), now())
RETURNING id""",
(T, P, column_name,
json.dumps([sample_value], ensure_ascii=False),
field_name, confidence, reason))
return (await cur.fetchone())["id"]
async def sa_save_evidence(records: list[dict]) -> int:
"""社交原始证据入库幂等platform+source_id 冲突则刷新计数/采集时间)。
抓一次挖多次:派生知识(标签/事件/舆情)从这里重算并可溯源。
"""
if not records:
return 0
s = settings.db_schema
n = 0
async with get_conn() as conn:
async with conn.cursor() as cur:
for r in records:
if not r.get("source_id"):
continue
await cur.execute(
f"""INSERT INTO {s}.social_evidence
(platform, kind, source_id, url, entity_name,
place_natural_key, keyword, title, content, author,
author_id, author_avatar, likes, comments, collects,
shares, publish_time, location, tags, image_urls,
raw_jsonb)
VALUES (%(platform)s,%(kind)s,%(source_id)s,%(url)s,
%(entity_name)s,%(place_natural_key)s,%(keyword)s,
%(title)s,%(content)s,%(author)s,%(author_id)s,
%(author_avatar)s,%(likes)s,%(comments)s,%(collects)s,
%(shares)s,%(publish_time)s,%(location)s,
%(tags)s,%(image_urls)s,%(raw_jsonb)s)
ON CONFLICT (platform, source_id) DO UPDATE SET
likes=EXCLUDED.likes, comments=EXCLUDED.comments,
collects=EXCLUDED.collects, shares=EXCLUDED.shares,
title=EXCLUDED.title, content=EXCLUDED.content,
place_natural_key=COALESCE(
{s}.social_evidence.place_natural_key,
EXCLUDED.place_natural_key),
captured_at=now()""",
{"platform": r.get("platform", "xhs"),
"kind": r.get("kind", "note"),
"source_id": str(r["source_id"]),
"url": r.get("url"), "entity_name": r.get("entity_name"),
"place_natural_key": r.get("place_natural_key"),
"keyword": r.get("keyword"), "title": r.get("title"),
"content": r.get("content"), "author": r.get("author"),
"author_id": r.get("author_id"),
"author_avatar": r.get("author_avatar"),
"likes": int(r.get("likes") or 0),
"comments": int(r.get("comments") or 0),
"collects": int(r.get("collects") or 0),
"shares": int(r.get("shares") or 0),
"publish_time": r.get("publish_time"),
"location": r.get("location"),
"tags": json.dumps(r.get("tags") or [], ensure_ascii=False),
"image_urls": json.dumps(r.get("image_urls") or [],
ensure_ascii=False),
"raw_jsonb": json.dumps(r.get("raw") or {},
ensure_ascii=False)})
n += 1
await conn.commit()
return n
async def sa_evidence_for(place_natural_key: str,
limit: int = 200) -> list[dict]:
"""取某实体的社交证据(文本+时间)供事件抽取/舆情挖掘。"""
s = settings.db_schema
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""SELECT kind, title, content, publish_time, likes,
comments, source_id, platform
FROM {s}.social_evidence
WHERE place_natural_key=%s
ORDER BY (kind='comment') DESC, likes DESC NULLS LAST
LIMIT %s""",
(place_natural_key, limit))
return await cur.fetchall()
async def sa_merge_candidate_payload(natural_key: str, fields: dict) -> None:
"""蒸馏富集回写:把共识字段并进候选 payload审计可溯并打已富集标记。"""
if not fields:
return
s = settings.db_schema
T, P = settings.default_tenant, settings.default_project
patch = {**fields, "_enriched": True}
async with get_conn() as conn:
async with conn.cursor() as cur:
await cur.execute(
f"""UPDATE {s}.candidate_entities
SET payload_jsonb = COALESCE(payload_jsonb,'{{}}'::jsonb)
|| %s::jsonb
WHERE tenant_id=%s AND project_id=%s
AND natural_key=%s""",
(json.dumps(patch, ensure_ascii=False), T, P, natural_key))
await conn.commit()