stage-2-v3-configurable-v3

This commit is contained in:
nq
2026-04-16 04:54:42 -07:00
parent 85a242b84b
commit 929d392404

View File

@@ -290,6 +290,34 @@ def infer_style_and_name(it: Landmark, locale: str) -> dict[str, Any]:
"display": style,
}
def inferred_postprocess_tags(it: Landmark) -> dict[str, str]:
sem_class = infer_semantic_class(it)
if sem_class == "drinking_water":
return {"amenity": "drinking_water"}
if sem_class == "spring":
return {"natural": "spring"}
if sem_class == "peak":
return {"natural": "peak"}
if sem_class == "village":
return {"place": "village"}
if sem_class == "town":
return {"place": "town"}
if sem_class == "city":
return {"place": "city"}
if sem_class == "locality":
return {"place": "locality"}
return {}
def render_template_value(template: Any, *, base_name: str, hash_value: str, existing_name: str, group_id: str) -> str:
return sanitize_text(str(template or '').format(
base_name=base_name,
hash=hash_value,
existing_name=existing_name,
group_id=group_id,
))
def spread_examples(items: list[Landmark], n: int) -> list[dict[str, Any]]:
ordered = sorted(items, key=lambda it: (it.mapset, round(it.lon,6), round(it.lat,6), it.name))
if not ordered or n <= 0:
@@ -394,6 +422,10 @@ def analyze_to_config(items: list[Landmark], example_count: int, locale_hint: Op
"preserve_existing_names": True,
"append_hash_when_named": False,
},
"postprocess": {
"add_tags": inferred_postprocess_tags(first),
"remove_tags": [],
},
"hash": dict(DEFAULT_HASH),
"stats": {
"count": len(members),
@@ -443,8 +475,7 @@ def load_json(path: Path) -> Any:
return json.loads(path.read_text(encoding="utf-8"))
def migrate_config(config: dict[str, Any]) -> dict[str, Any]:
"""Normalize older autoconfigs that accidentally promoted sparse semantic
tags into required match constraints for raw symbol groups."""
"""Normalize older configs and ensure postprocess hooks exist."""
cfg = json.loads(json.dumps(config, ensure_ascii=False))
for g in cfg.get("groups") or []:
match = g.setdefault("match", {})
@@ -465,6 +496,22 @@ def migrate_config(config: dict[str, Any]) -> dict[str, Any]:
keep_req.append(kv)
match["semantic_required_any"] = keep_req
match["semantic_preferred_any"] = preferred
pp = g.setdefault("postprocess", {})
add_tags = pp.get("add_tags")
if not isinstance(add_tags, dict):
pp["add_tags"] = {}
remove_tags = pp.get("remove_tags")
if not isinstance(remove_tags, list):
pp["remove_tags"] = []
naming = g.setdefault("naming", {})
naming.setdefault("mode", "template_if_missing")
naming.setdefault("base_name", "Landmark")
naming.setdefault("template", "{base_name} - {hash}")
display = g.setdefault("display", {})
display.setdefault("label", g.get("id") or "Group")
display.setdefault("color", "#FB8C00")
display.setdefault("icon", "marker")
display.setdefault("background", "circle")
return cfg
def has_cyrillic(text: str) -> bool:
@@ -549,6 +596,96 @@ def render_name(it: Landmark, group: dict[str, Any], config: dict[str, Any]) ->
return sanitize_text(template.format(base_name=base, hash=short_hash, existing_name=existing))
return existing or sanitize_text(template.format(base_name=base, hash=short_hash, existing_name=existing))
def interactive_edit_config(config: dict[str, Any], *, save_path: Optional[Path] = None) -> dict[str, Any]:
if not sys.stdin.isatty():
print("[warn] interactive config editing requested but stdin is not interactive", file=sys.stderr)
return config
cfg = json.loads(json.dumps(config, ensure_ascii=False))
groups = cfg.get("groups") or []
print("\nInteractive config editor", file=sys.stderr)
print("Press Enter to keep current value. Type ! to stop editing remaining groups.\n", file=sys.stderr)
stop = False
for g in groups:
if stop:
break
print(f"Group: {g.get('id')} priority={g.get('priority')}", file=sys.stderr)
print(f" Match keys: {', '.join(g.get('match',{}).get('point_group_keys') or [])}", file=sys.stderr)
stats = g.get('stats') or {}
print(f" Count: {stats.get('count', 0)}", file=sys.stderr)
for ex in (g.get('examples') or [])[:4]:
print(f" - {ex.get('lon')},{ex.get('lat')} | {ex.get('name') or '<unnamed>'} | {ex.get('raw_type')}", file=sys.stderr)
disp = g.setdefault('display', {})
naming = g.setdefault('naming', {})
pp = g.setdefault('postprocess', {})
current = sanitize_text(disp.get('label', ''))
reply = input(f"label [{current}]: ").strip()
if reply == '!':
stop = True
continue
if reply:
disp['label'] = sanitize_text(reply)
current = sanitize_text(naming.get('base_name', ''))
reply = input(f"base_name [{current}]: ").strip()
if reply == '!':
stop = True
continue
if reply:
naming['base_name'] = sanitize_text(reply)
current = sanitize_text(naming.get('template', ''))
reply = input(f"template [{current}]: ").strip()
if reply == '!':
stop = True
continue
if reply:
naming['template'] = sanitize_text(reply)
current = json.dumps(pp.get('add_tags') or {}, ensure_ascii=False, sort_keys=True)
reply = input(f"postprocess.add_tags JSON [{current}]: ").strip()
if reply == '!':
stop = True
continue
if reply:
try:
parsed = json.loads(reply)
if not isinstance(parsed, dict):
raise ValueError('JSON must be an object')
pp['add_tags'] = {sanitize_text(k): sanitize_text(v) for k, v in parsed.items() if sanitize_text(k) and sanitize_text(v)}
except Exception as e:
print(f"[warn] invalid JSON ignored: {e}", file=sys.stderr)
current = json.dumps(pp.get('remove_tags') or [], ensure_ascii=False)
reply = input(f"postprocess.remove_tags JSON [{current}]: ").strip()
if reply == '!':
stop = True
continue
if reply:
try:
parsed = json.loads(reply)
if not isinstance(parsed, list):
raise ValueError('JSON must be a list')
pp['remove_tags'] = [sanitize_text(v) for v in parsed if sanitize_text(v)]
except Exception as e:
print(f"[warn] invalid JSON ignored: {e}", file=sys.stderr)
print(file=sys.stderr)
if save_path:
save_json(save_path, cfg)
print(f"[info] wrote edited config: {save_path}", file=sys.stderr)
return cfg
def apply_postprocess(props: dict[str, str], group: dict[str, Any], *, base_name: str, hash_value: str, existing_name: str) -> dict[str, str]:
out = dict(props)
pp = group.get('postprocess') or {}
for k in pp.get('remove_tags') or []:
k2 = sanitize_text(k)
if k2:
out.pop(k2, None)
for k, v in (pp.get('add_tags') or {}).items():
k2 = sanitize_text(k)
if not k2:
continue
v2 = render_template_value(v, base_name=base_name, hash_value=hash_value, existing_name=existing_name, group_id=str(group.get('id') or ''))
if v2:
out[k2] = v2
return out
def build_records(items: list[Landmark], config: dict[str, Any]) -> tuple[list[dict[str, Any]], list[Landmark]]:
out = []
unmatched: list[Landmark] = []
@@ -569,6 +706,7 @@ def build_records(items: list[Landmark], config: dict[str, Any]) -> tuple[list[d
props["source:point_group"] = group.get("id") or ""
props["source:point_group_key"] = it.point_group_key
props["source:stable_hash"] = short_hash
props = apply_postprocess(props, group, base_name=str((group.get("naming") or {}).get("base_name") or "Landmark"), hash_value=short_hash, existing_name=it.name)
if it.garmin_image_id:
props["garmin:image_id"] = str(it.garmin_image_id)
if it.garmin_image_file:
@@ -709,6 +847,10 @@ def main(argv=None):
a.add_argument("--group-examples", type=int, default=4)
a.add_argument("--locale-hint", choices=["bg","en"])
e = sub.add_parser("edit-config", help="Interactively edit names/styles/postprocess rules inside a config")
e.add_argument("--config", required=True, type=Path)
e.add_argument("--output", type=Path, help="Write edited config here; defaults to in-place update")
b = sub.add_parser("build", help="Build resolved outputs from CSVs and a JSON config")
b.add_argument("csv", nargs="+", type=Path)
b.add_argument("--config", required=True, type=Path)
@@ -717,10 +859,18 @@ def main(argv=None):
b.add_argument("--gpx", type=Path)
b.add_argument("--summary-json", type=Path)
b.add_argument("--unmatched-csv", type=Path, help="Write unmatched input landmarks for debugging config misses")
b.add_argument("--interactive-edit-config", action="store_true", help="Review/edit naming and postprocess tags before build")
b.add_argument("--edited-config-out", type=Path, help="When using --interactive-edit-config, save the reviewed config here")
b.add_argument("--dedupe-mode", choices=["coord","coord_name","coord_type","coord_name_type","off"])
b.add_argument("--dedupe-radius-m", type=float)
args = ap.parse_args(argv)
if args.cmd == "edit-config":
cfg = migrate_config(load_json(args.config))
output = args.output or args.config
interactive_edit_config(cfg, save_path=output)
return 0
items = load_landmarks(args.csv)
if args.cmd == "analyze":
items = dedupe(items, radius_m=args.dedupe_radius_m, mode=args.dedupe_mode)
@@ -740,6 +890,8 @@ def main(argv=None):
return 0
cfg = migrate_config(load_json(args.config))
if args.interactive_edit_config:
cfg = interactive_edit_config(cfg, save_path=args.edited_config_out)
defaults = cfg.get("defaults") or {}
dedupe_cfg = defaults.get("dedupe") or {}
mode = args.dedupe_mode or dedupe_cfg.get("mode", "coord")