From 929d3924046ef807c50d6c374fecb2f79029e20c Mon Sep 17 00:00:00 2001 From: nq Date: Thu, 16 Apr 2026 04:54:42 -0700 Subject: [PATCH] stage-2-v3-configurable-v3 --- .../landmarks_csv_to_osmand.py | 156 +++++++++++++++++- 1 file changed, 154 insertions(+), 2 deletions(-) diff --git a/stage-2-parse-stage-1/landmarks_csv_to_osmand.py b/stage-2-parse-stage-1/landmarks_csv_to_osmand.py index 6118810..ca797e6 100644 --- a/stage-2-parse-stage-1/landmarks_csv_to_osmand.py +++ b/stage-2-parse-stage-1/landmarks_csv_to_osmand.py @@ -290,6 +290,34 @@ def infer_style_and_name(it: Landmark, locale: str) -> dict[str, Any]: "display": style, } + +def inferred_postprocess_tags(it: Landmark) -> dict[str, str]: + sem_class = infer_semantic_class(it) + if sem_class == "drinking_water": + return {"amenity": "drinking_water"} + if sem_class == "spring": + return {"natural": "spring"} + if sem_class == "peak": + return {"natural": "peak"} + if sem_class == "village": + return {"place": "village"} + if sem_class == "town": + return {"place": "town"} + if sem_class == "city": + return {"place": "city"} + if sem_class == "locality": + return {"place": "locality"} + return {} + + +def render_template_value(template: Any, *, base_name: str, hash_value: str, existing_name: str, group_id: str) -> str: + return sanitize_text(str(template or '').format( + base_name=base_name, + hash=hash_value, + existing_name=existing_name, + group_id=group_id, + )) + def spread_examples(items: list[Landmark], n: int) -> list[dict[str, Any]]: ordered = sorted(items, key=lambda it: (it.mapset, round(it.lon,6), round(it.lat,6), it.name)) if not ordered or n <= 0: @@ -394,6 +422,10 @@ def analyze_to_config(items: list[Landmark], example_count: int, locale_hint: Op "preserve_existing_names": True, "append_hash_when_named": False, }, + "postprocess": { + "add_tags": inferred_postprocess_tags(first), + "remove_tags": [], + }, "hash": dict(DEFAULT_HASH), "stats": { "count": len(members), @@ -443,8 +475,7 @@ def load_json(path: Path) -> Any: return json.loads(path.read_text(encoding="utf-8")) def migrate_config(config: dict[str, Any]) -> dict[str, Any]: - """Normalize older autoconfigs that accidentally promoted sparse semantic - tags into required match constraints for raw symbol groups.""" + """Normalize older configs and ensure postprocess hooks exist.""" cfg = json.loads(json.dumps(config, ensure_ascii=False)) for g in cfg.get("groups") or []: match = g.setdefault("match", {}) @@ -465,6 +496,22 @@ def migrate_config(config: dict[str, Any]) -> dict[str, Any]: keep_req.append(kv) match["semantic_required_any"] = keep_req match["semantic_preferred_any"] = preferred + pp = g.setdefault("postprocess", {}) + add_tags = pp.get("add_tags") + if not isinstance(add_tags, dict): + pp["add_tags"] = {} + remove_tags = pp.get("remove_tags") + if not isinstance(remove_tags, list): + pp["remove_tags"] = [] + naming = g.setdefault("naming", {}) + naming.setdefault("mode", "template_if_missing") + naming.setdefault("base_name", "Landmark") + naming.setdefault("template", "{base_name} - {hash}") + display = g.setdefault("display", {}) + display.setdefault("label", g.get("id") or "Group") + display.setdefault("color", "#FB8C00") + display.setdefault("icon", "marker") + display.setdefault("background", "circle") return cfg def has_cyrillic(text: str) -> bool: @@ -549,6 +596,96 @@ def render_name(it: Landmark, group: dict[str, Any], config: dict[str, Any]) -> return sanitize_text(template.format(base_name=base, hash=short_hash, existing_name=existing)) return existing or sanitize_text(template.format(base_name=base, hash=short_hash, existing_name=existing)) +def interactive_edit_config(config: dict[str, Any], *, save_path: Optional[Path] = None) -> dict[str, Any]: + if not sys.stdin.isatty(): + print("[warn] interactive config editing requested but stdin is not interactive", file=sys.stderr) + return config + cfg = json.loads(json.dumps(config, ensure_ascii=False)) + groups = cfg.get("groups") or [] + print("\nInteractive config editor", file=sys.stderr) + print("Press Enter to keep current value. Type ! to stop editing remaining groups.\n", file=sys.stderr) + stop = False + for g in groups: + if stop: + break + print(f"Group: {g.get('id')} priority={g.get('priority')}", file=sys.stderr) + print(f" Match keys: {', '.join(g.get('match',{}).get('point_group_keys') or [])}", file=sys.stderr) + stats = g.get('stats') or {} + print(f" Count: {stats.get('count', 0)}", file=sys.stderr) + for ex in (g.get('examples') or [])[:4]: + print(f" - {ex.get('lon')},{ex.get('lat')} | {ex.get('name') or ''} | {ex.get('raw_type')}", file=sys.stderr) + disp = g.setdefault('display', {}) + naming = g.setdefault('naming', {}) + pp = g.setdefault('postprocess', {}) + current = sanitize_text(disp.get('label', '')) + reply = input(f"label [{current}]: ").strip() + if reply == '!': + stop = True + continue + if reply: + disp['label'] = sanitize_text(reply) + current = sanitize_text(naming.get('base_name', '')) + reply = input(f"base_name [{current}]: ").strip() + if reply == '!': + stop = True + continue + if reply: + naming['base_name'] = sanitize_text(reply) + current = sanitize_text(naming.get('template', '')) + reply = input(f"template [{current}]: ").strip() + if reply == '!': + stop = True + continue + if reply: + naming['template'] = sanitize_text(reply) + current = json.dumps(pp.get('add_tags') or {}, ensure_ascii=False, sort_keys=True) + reply = input(f"postprocess.add_tags JSON [{current}]: ").strip() + if reply == '!': + stop = True + continue + if reply: + try: + parsed = json.loads(reply) + if not isinstance(parsed, dict): + raise ValueError('JSON must be an object') + pp['add_tags'] = {sanitize_text(k): sanitize_text(v) for k, v in parsed.items() if sanitize_text(k) and sanitize_text(v)} + except Exception as e: + print(f"[warn] invalid JSON ignored: {e}", file=sys.stderr) + current = json.dumps(pp.get('remove_tags') or [], ensure_ascii=False) + reply = input(f"postprocess.remove_tags JSON [{current}]: ").strip() + if reply == '!': + stop = True + continue + if reply: + try: + parsed = json.loads(reply) + if not isinstance(parsed, list): + raise ValueError('JSON must be a list') + pp['remove_tags'] = [sanitize_text(v) for v in parsed if sanitize_text(v)] + except Exception as e: + print(f"[warn] invalid JSON ignored: {e}", file=sys.stderr) + print(file=sys.stderr) + if save_path: + save_json(save_path, cfg) + print(f"[info] wrote edited config: {save_path}", file=sys.stderr) + return cfg + +def apply_postprocess(props: dict[str, str], group: dict[str, Any], *, base_name: str, hash_value: str, existing_name: str) -> dict[str, str]: + out = dict(props) + pp = group.get('postprocess') or {} + for k in pp.get('remove_tags') or []: + k2 = sanitize_text(k) + if k2: + out.pop(k2, None) + for k, v in (pp.get('add_tags') or {}).items(): + k2 = sanitize_text(k) + if not k2: + continue + v2 = render_template_value(v, base_name=base_name, hash_value=hash_value, existing_name=existing_name, group_id=str(group.get('id') or '')) + if v2: + out[k2] = v2 + return out + def build_records(items: list[Landmark], config: dict[str, Any]) -> tuple[list[dict[str, Any]], list[Landmark]]: out = [] unmatched: list[Landmark] = [] @@ -569,6 +706,7 @@ def build_records(items: list[Landmark], config: dict[str, Any]) -> tuple[list[d props["source:point_group"] = group.get("id") or "" props["source:point_group_key"] = it.point_group_key props["source:stable_hash"] = short_hash + props = apply_postprocess(props, group, base_name=str((group.get("naming") or {}).get("base_name") or "Landmark"), hash_value=short_hash, existing_name=it.name) if it.garmin_image_id: props["garmin:image_id"] = str(it.garmin_image_id) if it.garmin_image_file: @@ -709,6 +847,10 @@ def main(argv=None): a.add_argument("--group-examples", type=int, default=4) a.add_argument("--locale-hint", choices=["bg","en"]) + e = sub.add_parser("edit-config", help="Interactively edit names/styles/postprocess rules inside a config") + e.add_argument("--config", required=True, type=Path) + e.add_argument("--output", type=Path, help="Write edited config here; defaults to in-place update") + b = sub.add_parser("build", help="Build resolved outputs from CSVs and a JSON config") b.add_argument("csv", nargs="+", type=Path) b.add_argument("--config", required=True, type=Path) @@ -717,10 +859,18 @@ def main(argv=None): b.add_argument("--gpx", type=Path) b.add_argument("--summary-json", type=Path) b.add_argument("--unmatched-csv", type=Path, help="Write unmatched input landmarks for debugging config misses") + b.add_argument("--interactive-edit-config", action="store_true", help="Review/edit naming and postprocess tags before build") + b.add_argument("--edited-config-out", type=Path, help="When using --interactive-edit-config, save the reviewed config here") b.add_argument("--dedupe-mode", choices=["coord","coord_name","coord_type","coord_name_type","off"]) b.add_argument("--dedupe-radius-m", type=float) args = ap.parse_args(argv) + if args.cmd == "edit-config": + cfg = migrate_config(load_json(args.config)) + output = args.output or args.config + interactive_edit_config(cfg, save_path=output) + return 0 + items = load_landmarks(args.csv) if args.cmd == "analyze": items = dedupe(items, radius_m=args.dedupe_radius_m, mode=args.dedupe_mode) @@ -740,6 +890,8 @@ def main(argv=None): return 0 cfg = migrate_config(load_json(args.config)) + if args.interactive_edit_config: + cfg = interactive_edit_config(cfg, save_path=args.edited_config_out) defaults = cfg.get("defaults") or {} dedupe_cfg = defaults.get("dedupe") or {} mode = args.dedupe_mode or dedupe_cfg.get("mode", "coord")