Polish project documentation and runtime config

This commit is contained in:
2026-06-09 10:22:59 +08:00
parent 5f061295d8
commit 0594fc9f8c
43 changed files with 1001 additions and 97 deletions

View File

@@ -13,7 +13,7 @@ import sys
from pathlib import Path
from typing import Any
ROOT = Path("/Users/xuexue/new2")
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))

View File

@@ -1,6 +1,7 @@
import http from "node:http";
import { spawn } from "node:child_process";
import { readFileSync } from "node:fs";
import path from "node:path";
function readEnvKey(path, key) {
const txt = readFileSync(path, "utf8");
@@ -62,8 +63,10 @@ async function wait(ms) {
}
async function main() {
const key = readEnvKey("/Users/xuexue/new2/.env", "AMAP_JS_KEY");
const security = readEnvKey("/Users/xuexue/new2/.env", "AMAP_SECURITY_JSCODE");
const root = path.resolve(new URL("..", import.meta.url).pathname, "..");
const envPath = process.env.TRAVEL_KG_ENV_PATH || path.join(root, ".env");
const key = readEnvKey(envPath, "AMAP_JS_KEY");
const security = readEnvKey(envPath, "AMAP_SECURITY_JSCODE");
if (!key || !security) throw new Error("missing AMap JS key/security");
const chrome = spawn(

View File

@@ -16,10 +16,11 @@ from falkordb import FalkorDB
from psycopg.rows import dict_row
from psycopg.types.json import Jsonb
from common_paths import PROJECT_ROOT, TRAVEL_AGENCY_SOURCE_ROOT, TRAVEL_KG_EXPORT_ROOT
SOURCE_DIR = Path("/Users/xuexue/Downloads/旅行社业务")
OUT_DIR = Path("/Users/xuexue/Downloads/图谱数据/旅行社项目入库")
SCHEMA_DIR = Path("/Users/xuexue/new2/schema搭建/travel_agency_business")
SOURCE_DIR = TRAVEL_AGENCY_SOURCE_ROOT
OUT_DIR = TRAVEL_KG_EXPORT_ROOT / "旅行社项目入库"
SCHEMA_DIR = PROJECT_ROOT / "schema搭建/travel_agency_business"
DB_URL = "postgresql://admin:password@localhost:5433/kg_admin"
DB_SCHEMA = "kg_admin_new2"
TENANT_ID = "travel_agency"
@@ -1829,7 +1830,7 @@ def write_outputs(builder: KGBuilder, schema: dict[str, Any], qa: list[dict[str,
f"生成时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
"",
"## 数据来源",
"- `/Users/xuexue/Downloads/旅行社业务/2026年新行程打包`:既有线路产品、每日行程、费用包含/不含、自费项、风险提示。",
"- `TRAVEL_AGENCY_SOURCE_ROOT/2026年新行程打包`:既有线路产品、每日行程、费用包含/不含、自费项、风险提示。",
"- `滨海国旅2-8人拼小团计划...xlsx`2-8人拼小团团期、房型、成人/儿童/单房差、景区小交通、证件退费政策。",
"- `20-25人独立成团.xlsx`独立成团产品、季节价、20/25人报价、泰语导游和2+1大巴服务。",
"- `住宿资源库(四钻及以上).xlsx`、`餐厅资源库.xlsx`:酒店/餐厅资源、区域、价格、适用场景。",

View File

@@ -18,15 +18,16 @@ from falkordb import FalkorDB
from psycopg.rows import dict_row
from psycopg.types.json import Jsonb
from common_paths import PROJECT_ROOT, TRAVEL_AGENCY_SOURCE_ROOT, TRAVEL_KG_EXPORT_ROOT
SOURCE_ROOT = Path("/Users/xuexue/Downloads/旅行社业务")
SOURCE_ROOT = TRAVEL_AGENCY_SOURCE_ROOT
ROUTE_SOURCE_DIR = SOURCE_ROOT / "2026年新行程打包"
ROUTE_MD_DIR = SOURCE_ROOT / "2026年新行程打包_md整理"
ROUTE_MD_PRODUCTS = ROUTE_MD_DIR / "products"
LEGACY_SCRIPT = Path("/Users/xuexue/new2/scripts/build_travel_graph_existing_product_project.py")
LEGACY_SCRIPT = PROJECT_ROOT / "scripts/build_travel_graph_existing_product_project.py"
SCHEMA_OUT_DIR = Path("/Users/xuexue/new2/schema搭建/travel_fixed_route_item")
OUT_DIR = Path("/Users/xuexue/Downloads/图谱数据/travel_fixed_route_item_旅行社固定线路资源图谱")
SCHEMA_OUT_DIR = PROJECT_ROOT / "schema搭建/travel_fixed_route_item"
OUT_DIR = TRAVEL_KG_EXPORT_ROOT / "travel_fixed_route_item_旅行社固定线路资源图谱"
DB_URL = "postgresql://admin:password@localhost:5433/kg_admin"
DB_SCHEMA = "kg_admin_new2"

View File

@@ -17,13 +17,14 @@ from falkordb import FalkorDB
from psycopg.rows import dict_row
from psycopg.types.json import Jsonb
from common_paths import PROJECT_ROOT, TRAVEL_AGENCY_SOURCE_ROOT, TRAVEL_KG_EXPORT_ROOT
SOURCE_ROOT = Path("/Users/xuexue/Downloads/旅行社业务")
SOURCE_ROOT = TRAVEL_AGENCY_SOURCE_ROOT
ROUTE_MD_DIR = SOURCE_ROOT / "2026年新行程打包_md整理"
ROUTE_MD_PRODUCTS = ROUTE_MD_DIR / "products"
SCHEMA_SRC = Path("/Users/xuexue/new2/schema搭建/travel_agency_business/travel_agency_existing_product_schema.v1.json")
SCHEMA_OUT_DIR = Path("/Users/xuexue/new2/schema搭建/travel_graph_existing_product")
OUT_DIR = Path("/Users/xuexue/Downloads/图谱数据/travel_graph_旅行社线路制定")
SCHEMA_SRC = PROJECT_ROOT / "schema搭建/travel_agency_business/travel_agency_existing_product_schema.v1.json"
SCHEMA_OUT_DIR = PROJECT_ROOT / "schema搭建/travel_graph_existing_product"
OUT_DIR = TRAVEL_KG_EXPORT_ROOT / "travel_graph_旅行社线路制定"
AMAP_CACHE_PATH = OUT_DIR / "amap_poi_enrichment_cache.json"
AMAP_DRIVING_CACHE_PATH = OUT_DIR / "amap_driving_distance_cache.json"

View File

@@ -5,7 +5,7 @@ from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path("/Users/xuexue/new2")
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))

17
scripts/common_paths.py Normal file
View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import os
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parents[1]
DATA_ROOT = Path(os.getenv("TRAVEL_KG_DATA_ROOT", PROJECT_ROOT / "data")).expanduser()
TRAVEL_AGENCY_SOURCE_ROOT = Path(
os.getenv("TRAVEL_AGENCY_SOURCE_ROOT", DATA_ROOT / "source" / "travel_agency")
).expanduser()
TRAVEL_DELIVERY_ROOT = Path(
os.getenv("TRAVEL_DELIVERY_ROOT", DATA_ROOT / "source" / "travel_delivery_20260602")
).expanduser()
TRAVEL_KG_EXPORT_ROOT = Path(os.getenv("TRAVEL_KG_EXPORT_ROOT", DATA_ROOT / "exports")).expanduser()
GAODE_CRAWLER_PATH = Path(os.getenv("GAODE_CRAWLER_PATH", PROJECT_ROOT / "scripts" / "crawl_guiyan.py")).expanduser()
ENV_PATH = Path(os.getenv("TRAVEL_KG_ENV_PATH", PROJECT_ROOT / ".env")).expanduser()

View File

@@ -21,10 +21,12 @@ from typing import Any
import requests
import urllib3
from common_paths import GAODE_CRAWLER_PATH, PROJECT_ROOT, TRAVEL_KG_EXPORT_ROOT
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
BUILD_SCRIPT = Path("/Users/xuexue/new2/scripts/build_travel_graph_existing_product_project.py")
OUT_DIR = Path("/Users/xuexue/Downloads/图谱数据/travel_graph_旅行社线路制定")
BUILD_SCRIPT = PROJECT_ROOT / "scripts/build_travel_graph_existing_product_project.py"
OUT_DIR = TRAVEL_KG_EXPORT_ROOT / "travel_graph_旅行社线路制定"
NODES_PATH = OUT_DIR / "抽取结果_nodes.json"
CACHE_PATH = OUT_DIR / "amap_driving_distance_cache.json"
REPORT_CSV = OUT_DIR / "amap_driving_distance_report.csv"
@@ -48,7 +50,7 @@ def load_key() -> str:
for key in (os.environ.get("AMAP_WEB_KEY"), os.environ.get("AMAP_KEY")):
if key:
return key
crawl_path = Path("/Users/xuexue/PycharmProjects/PythonProject/xuexue-CityGraph/crawl_guiyan.py")
crawl_path = GAODE_CRAWLER_PATH
if crawl_path.exists():
spec = importlib.util.spec_from_file_location("crawl_guiyan", crawl_path)
mod = importlib.util.module_from_spec(spec)

View File

@@ -21,10 +21,12 @@ from typing import Any
import requests
import urllib3
from common_paths import GAODE_CRAWLER_PATH, PROJECT_ROOT, TRAVEL_KG_EXPORT_ROOT
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
BUILD_SCRIPT = Path("/Users/xuexue/new2/scripts/build_travel_graph_existing_product_project.py")
OUT_DIR = Path("/Users/xuexue/Downloads/图谱数据/travel_graph_旅行社线路制定")
BUILD_SCRIPT = PROJECT_ROOT / "scripts/build_travel_graph_existing_product_project.py"
OUT_DIR = TRAVEL_KG_EXPORT_ROOT / "travel_graph_旅行社线路制定"
NODES_PATH = OUT_DIR / "抽取结果_nodes.json"
CACHE_PATH = OUT_DIR / "amap_poi_enrichment_cache.json"
REPORT_CSV = OUT_DIR / "amap_poi_enrichment_report.csv"
@@ -47,7 +49,7 @@ def load_key() -> str:
for key in (os.environ.get("AMAP_WEB_KEY"), os.environ.get("AMAP_KEY")):
if key:
return key
crawl_path = Path("/Users/xuexue/PycharmProjects/PythonProject/xuexue-CityGraph/crawl_guiyan.py")
crawl_path = GAODE_CRAWLER_PATH
if crawl_path.exists():
spec = importlib.util.spec_from_file_location("crawl_guiyan", crawl_path)
mod = importlib.util.module_from_spec(spec)

View File

@@ -18,10 +18,10 @@ import urllib.request
from pathlib import Path
from typing import Any
from common_paths import ENV_PATH, TRAVEL_DELIVERY_ROOT
BASE_DIR = Path("/Users/xuexue/Documents/trae_projects/travel- graph/delivery_20260602")
BASE_DIR = TRAVEL_DELIVERY_ROOT
OUT_DIR = BASE_DIR / "amap_enriched"
ENV_PATH = Path("/Users/xuexue/Desktop/zn-kg/.env")
CACHE_PATH = OUT_DIR / "_amap_cache.json"
SCENIC_TYPES = "110000"

View File

@@ -4,7 +4,8 @@ import path from "node:path";
import http from "node:http";
import { spawn } from "node:child_process";
const BASE_DIR = "/Users/xuexue/Documents/trae_projects/travel- graph/delivery_20260602";
const ROOT = path.resolve(new URL("..", import.meta.url).pathname, "..");
const BASE_DIR = process.env.TRAVEL_DELIVERY_ROOT || path.join(ROOT, "data", "source", "travel_delivery_20260602");
const OUT_DIR = path.join(BASE_DIR, "amap_js_enriched");
const CACHE_FILE = path.join(OUT_DIR, "_amap_js_cache.json");
const CHROME = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome";
@@ -185,8 +186,9 @@ function jsString(v) {
}
async function initAmap(cdp) {
const key = readEnvKey("/Users/xuexue/new2/.env", "AMAP_JS_KEY");
const security = readEnvKey("/Users/xuexue/new2/.env", "AMAP_SECURITY_JSCODE");
const envPath = process.env.TRAVEL_KG_ENV_PATH || path.join(ROOT, ".env");
const key = readEnvKey(envPath, "AMAP_JS_KEY");
const security = readEnvKey(envPath, "AMAP_SECURITY_JSCODE");
if (!key || !security) throw new Error("missing AMap JS key/security");
const expr = `
(async () => {

View File

@@ -12,9 +12,11 @@ from typing import Any
from falkordb import FalkorDB
from common_paths import TRAVEL_KG_EXPORT_ROOT
GRAPH_NAME = "travel_agency_2_0_test"
OUT_ROOT = Path("/Users/xuexue/Downloads/图谱数据/travel_agency_2_0_test_旅行社2.0测试")
OUT_ROOT = TRAVEL_KG_EXPORT_ROOT / "travel_agency_2_0_test_旅行社2.0测试"
SCHEMA_SIMPLE = OUT_ROOT / "tencent_adp_schema.simple.json"
FILTERED_DIR = OUT_ROOT / "filtered_import_from_travel_fixed_route_item"
POI_DIR = OUT_ROOT / "poi_nearby_import_without_amap"

View File

@@ -13,7 +13,7 @@ import sys
from pathlib import Path
from typing import Any
ROOT = Path("/Users/xuexue/new2")
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))

View File

@@ -5,9 +5,11 @@ from datetime import datetime
from falkordb import FalkorDB
from common_paths import TRAVEL_AGENCY_SOURCE_ROOT
GRAPH_NAME = "travel_agency_2_0_test"
SOURCE_FILE = "/Users/xuexue/Downloads/旅行社业务/线上客资回复话术.docx"
SOURCE_FILE = str(TRAVEL_AGENCY_SOURCE_ROOT / "线上客资回复话术.docx")
UPDATED_AT = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

View File

@@ -14,6 +14,8 @@ from falkordb import FalkorDB
from psycopg.rows import dict_row
from psycopg.types.json import Jsonb
from common_paths import TRAVEL_DELIVERY_ROOT, TRAVEL_KG_EXPORT_ROOT
DB_URL = "postgresql://admin:password@localhost:5433/kg_admin"
DB_SCHEMA = "kg_admin_new2"
@@ -22,10 +24,10 @@ PROJECT_ID = "travel_agency_2_0_test"
GRAPH_NAME = "travel_agency_2_0_test"
TEMPLATE_ID = "travel_agency_2_0_poi_nearby_import_without_amap_v1"
SOURCE_DIR = Path("/Users/xuexue/Documents/trae_projects/travel- graph/delivery_20260602")
SOURCE_DIR = TRAVEL_DELIVERY_ROOT
HOTEL_FILE = SOURCE_DIR / "hotel_poi.csv"
RESTAURANT_FILE = SOURCE_DIR / "restaurant_poi.csv"
OUT_DIR = Path("/Users/xuexue/Downloads/图谱数据/travel_agency_2_0_test_旅行社2.0测试/poi_nearby_import_without_amap")
OUT_DIR = TRAVEL_KG_EXPORT_ROOT / "travel_agency_2_0_test_旅行社2.0测试/poi_nearby_import_without_amap"
RUN_UPDATED_AT = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

View File

@@ -13,7 +13,7 @@ from copy import deepcopy
from pathlib import Path
from typing import Any
ROOT = Path("/Users/xuexue/new2")
ROOT = Path(__file__).resolve().parents[1]
IN_JSON = ROOT / "docs/reports/huaxi_kg_extraction_comparison.json"
SCHEMA_JSON = ROOT / "app/schemas/kg_extraction_v1.schema.json"
OUT_JSON = ROOT / "docs/reports/huaxi_kg_schema_v1_ready.json"
@@ -217,7 +217,7 @@ def write_review_plan(raw: dict[str, Any], payload: dict[str, Any], validation:
"-> final_score < 0.8 或模型冲突:进入人工审核",
"```",
"",
"对应严格 JSON 输出:`/Users/xuexue/new2/docs/reports/huaxi_kg_schema_v1_ready.json`",
"对应严格 JSON 输出:`docs/reports/huaxi_kg_schema_v1_ready.json`",
]
OUT_REVIEW.write_text("\n".join(lines), encoding="utf-8")

View File

@@ -14,6 +14,8 @@ from falkordb import FalkorDB
from psycopg.rows import dict_row
from psycopg.types.json import Jsonb
from common_paths import TRAVEL_KG_EXPORT_ROOT
DB_URL = "postgresql://admin:password@localhost:5433/kg_admin"
DB_SCHEMA = "kg_admin_new2"
@@ -25,7 +27,7 @@ TARGET_PROJECT = "travel_agency_2_0_test"
TARGET_GRAPH = "travel_agency_2_0_test"
TARGET_TEMPLATE_ID = "travel_agency_2_0_fixed_route_core_import_v1"
OUT_DIR = Path("/Users/xuexue/Downloads/图谱数据/travel_agency_2_0_test_旅行社2.0测试/filtered_import_from_travel_fixed_route_item")
OUT_DIR = TRAVEL_KG_EXPORT_ROOT / "travel_agency_2_0_test_旅行社2.0测试/filtered_import_from_travel_fixed_route_item"
RUN_UPDATED_AT = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
EXCLUDED_ENTITY_TYPES = {

View File

@@ -9,10 +9,11 @@ from datetime import datetime
from pathlib import Path
from typing import Any
from common_paths import TRAVEL_AGENCY_SOURCE_ROOT, TRAVEL_KG_EXPORT_ROOT
SOURCE_DIR = Path("/Users/xuexue/Downloads/旅行社业务/2026年新行程打包")
OUT_DIR = Path("/Users/xuexue/Downloads/旅行社业务/2026年新行程打包_md整理")
GRAPH_OUT_DIR = Path("/Users/xuexue/Downloads/图谱数据/旅行社项目入库/已有路线产品Markdown")
SOURCE_DIR = TRAVEL_AGENCY_SOURCE_ROOT / "2026年新行程打包"
OUT_DIR = TRAVEL_AGENCY_SOURCE_ROOT / "2026年新行程打包_md整理"
GRAPH_OUT_DIR = TRAVEL_KG_EXPORT_ROOT / "旅行社项目入库/已有路线产品Markdown"
ATTRACTION_ALIASES = {

View File

@@ -9,7 +9,7 @@ import sys
from pathlib import Path
from typing import Any
ROOT = Path("/Users/xuexue/new2")
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))

View File

@@ -10,13 +10,14 @@ from psycopg.rows import dict_row
from psycopg.types.json import Jsonb
from app.config import settings
from common_paths import PROJECT_ROOT, TRAVEL_KG_EXPORT_ROOT
PROJECT_ID = "travel_agency_2_0_test"
TENANT_ID = "travel_agency"
GRAPH_NAME = "travel_agency_2_0_test"
NAMESPACE = "travel_agency_2_0"
SCHEMA_DIR = Path("/Users/xuexue/new2/schema搭建/travel_agency_2_0_test")
DOWNLOAD_DIR = Path("/Users/xuexue/Downloads/图谱数据/travel_agency_2_0_test_旅行社2.0测试")
SCHEMA_DIR = PROJECT_ROOT / "schema搭建/travel_agency_2_0_test"
DOWNLOAD_DIR = TRAVEL_KG_EXPORT_ROOT / "travel_agency_2_0_test_旅行社2.0测试"
CURRENT_JSON = SCHEMA_DIR / "travel_agency_2_0_schema.current.json"

View File

@@ -10,13 +10,14 @@ from psycopg.rows import dict_row
from psycopg.types.json import Jsonb
from app.config import settings
from common_paths import PROJECT_ROOT, TRAVEL_KG_EXPORT_ROOT
PROJECT_ID = "travel_agency_2_0_test"
TENANT_ID = "travel_agency"
GRAPH_NAME = "travel_agency_2_0_test"
NAMESPACE = "travel_agency_2_0"
SCHEMA_DIR = Path("/Users/xuexue/new2/schema搭建/travel_agency_2_0_test")
DOWNLOAD_DIR = Path("/Users/xuexue/Downloads/图谱数据/travel_agency_2_0_test_旅行社2.0测试")
SCHEMA_DIR = PROJECT_ROOT / "schema搭建/travel_agency_2_0_test"
DOWNLOAD_DIR = TRAVEL_KG_EXPORT_ROOT / "travel_agency_2_0_test_旅行社2.0测试"
CURRENT_JSON = SCHEMA_DIR / "travel_agency_2_0_schema.current.json"

View File

@@ -10,14 +10,15 @@ from psycopg.rows import dict_row
from psycopg.types.json import Jsonb
from app.config import settings
from common_paths import PROJECT_ROOT, TRAVEL_KG_EXPORT_ROOT
PROJECT_ID = "travel_agency_2_0_test"
TENANT_ID = "travel_agency"
GRAPH_NAME = "travel_agency_2_0_test"
NAMESPACE = "travel_agency_2_0"
SCHEMA_DIR = Path("/Users/xuexue/new2/schema搭建/travel_agency_2_0_test")
DOWNLOAD_DIR = Path("/Users/xuexue/Downloads/图谱数据/travel_agency_2_0_test_旅行社2.0测试")
SCHEMA_DIR = PROJECT_ROOT / "schema搭建/travel_agency_2_0_test"
DOWNLOAD_DIR = TRAVEL_KG_EXPORT_ROOT / "travel_agency_2_0_test_旅行社2.0测试"
CURRENT_JSON = SCHEMA_DIR / "travel_agency_2_0_schema.current.json"