diff --git a/stage-2-parse-stage-1/landmarks_csv_to_osmand.py b/stage-2-parse-stage-1/landmarks_csv_to_osmand.py index a82adfa..4ade7f6 100644 --- a/stage-2-parse-stage-1/landmarks_csv_to_osmand.py +++ b/stage-2-parse-stage-1/landmarks_csv_to_osmand.py @@ -11,6 +11,8 @@ import math import re import sys import subprocess +import zipfile +import html import xml.etree.ElementTree as ET from collections import Counter, defaultdict from dataclasses import dataclass, field @@ -19,7 +21,9 @@ from typing import Any, Iterable, Optional OSMAND_NS = "https://osmand.net" GPX_NS = "http://www.topografix.com/GPX/1/1" +KML_NS = "http://www.opengis.net/kml/2.2" ET.register_namespace("osmand", OSMAND_NS) +ET.register_namespace("", KML_NS) EARTH_M_PER_DEG_LAT = 111_320.0 _XML_INVALID_RE = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\uD800-\uDFFF\uFFFE\uFFFF]") @@ -986,6 +990,176 @@ def write_gpx(records: list[dict[str, Any]], path: Path): with path.open("wb") as f: tree.write(f, encoding="utf-8", xml_declaration=True) + + +def kml_color(value: object, alpha: str = "ff") -> str: + """Convert #RRGGBB / RRGGBB / #AARRGGBB to KML aabbggrr.""" + s = sanitize_text(value).strip() + if not s: + s = "#FB8C00" + if s.startswith("#"): + s = s[1:] + s = re.sub(r"[^0-9A-Fa-f]", "", s) + if len(s) == 8: + # Input treated as AARRGGBB. + aa, rr, gg, bb = s[0:2], s[2:4], s[4:6], s[6:8] + elif len(s) == 6: + aa, rr, gg, bb = alpha, s[0:2], s[2:4], s[4:6] + else: + aa, rr, gg, bb = alpha, "FB", "8C", "00" + return f"{aa}{bb}{gg}{rr}".lower() + + +def kml_id(value: object) -> str: + s = sanitize_text(value).strip() or "id" + s = re.sub(r"[^A-Za-z0-9_\-.]+", "_", s) + if not re.match(r"[A-Za-z_]", s): + s = "x_" + s + return s[:120] + + +def safe_filename(value: object, default: str = "group") -> str: + s = sanitize_text(value).strip() or default + s = re.sub(r"[^A-Za-z0-9_\-.\u0400-\u04FF]+", "_", s) + s = s.strip("._ ") or default + return s[:120] + + +def kml_icon_href(icon: str) -> str: + # These are remote Google Earth built-in-ish icon URLs. Color is controlled by IconStyle color. + i = sanitize_text(icon).strip().lower() + if i in {"water_drop", "water", "drinking_water"}: + return "http://maps.google.com/mapfiles/kml/paddle/wht-circle.png" + if i in {"special_star", "star", "peak", "summit"}: + return "http://maps.google.com/mapfiles/kml/paddle/wht-stars.png" + if i in {"town", "city", "village"}: + return "http://maps.google.com/mapfiles/kml/paddle/wht-blank.png" + return "http://maps.google.com/mapfiles/kml/paddle/wht-blank.png" + + +def html_description(record: dict[str, Any], max_props: int = 80) -> str: + props = record.get("props") or {} + rows = [] + rows.append(f"

{html.escape(sanitize_text(record.get('name', '')))}

") + rows.append("") + basics = { + "group_id": record.get("group_id", ""), + "group_label": record.get("group_label", ""), + "stable_hash": record.get("short_hash", ""), + "lat": f"{record.get('lat', 0):.8f}", + "lon": f"{record.get('lon', 0):.8f}", + "duplicate_count": record.get("duplicate_count", ""), + } + for k, v in basics.items(): + rows.append(f"") + rows.append("
{html.escape(str(k))}{html.escape(sanitize_text(v))}
") + rows.append("
") + for idx, (k, v) in enumerate(sorted(props.items())): + if idx >= max_props: + rows.append(f"") + break + rows.append(f"") + rows.append("
... {len(props)-max_props} more properties
{html.escape(sanitize_text(k))}{html.escape(sanitize_text(v))}
") + return "\n".join(rows) + + +def write_kml( + records: list[dict[str, Any]], + path: Path, + *, + document_name: Optional[str] = None, + folder_visibility: int = 1, + folder_open: int = 0, + label_scale: float = 0.0, + icon_scale: float = 1.1, + include_extended_data: bool = True, +) -> None: + kml = ET.Element(f"{{{KML_NS}}}kml") + doc = ET.SubElement(kml, f"{{{KML_NS}}}Document") + ET.SubElement(doc, f"{{{KML_NS}}}name").text = sanitize_text(document_name or path.stem) + ET.SubElement(doc, f"{{{KML_NS}}}open").text = "1" + + grouped: dict[str, list[dict[str, Any]]] = defaultdict(list) + group_meta: dict[str, dict[str, Any]] = {} + for r in records: + gid = sanitize_text(r.get("group_id") or "ungrouped") + grouped[gid].append(r) + group_meta.setdefault(gid, r) + + for gid, sample in sorted(group_meta.items(), key=lambda kv: sanitize_text(kv[1].get("group_label") or kv[0]).casefold()): + sid = "style_" + kml_id(gid) + style = ET.SubElement(doc, f"{{{KML_NS}}}Style", {"id": sid}) + icon_style = ET.SubElement(style, f"{{{KML_NS}}}IconStyle") + ET.SubElement(icon_style, f"{{{KML_NS}}}color").text = kml_color(sample.get("color")) + ET.SubElement(icon_style, f"{{{KML_NS}}}scale").text = f"{icon_scale:g}" + icon = ET.SubElement(icon_style, f"{{{KML_NS}}}Icon") + ET.SubElement(icon, f"{{{KML_NS}}}href").text = kml_icon_href(str(sample.get("icon") or "marker")) + label_style = ET.SubElement(style, f"{{{KML_NS}}}LabelStyle") + ET.SubElement(label_style, f"{{{KML_NS}}}color").text = kml_color(sample.get("color")) + ET.SubElement(label_style, f"{{{KML_NS}}}scale").text = f"{label_scale:g}" + + for gid, group_records in sorted(grouped.items(), key=lambda kv: sanitize_text(group_meta[kv[0]].get("group_label") or kv[0]).casefold()): + sample = group_meta[gid] + folder = ET.SubElement(doc, f"{{{KML_NS}}}Folder") + ET.SubElement(folder, f"{{{KML_NS}}}name").text = f"{sanitize_text(sample.get('group_label') or gid)} ({len(group_records)})" + ET.SubElement(folder, f"{{{KML_NS}}}visibility").text = "1" if folder_visibility else "0" + ET.SubElement(folder, f"{{{KML_NS}}}open").text = "1" if folder_open else "0" + for r in group_records: + pm = ET.SubElement(folder, f"{{{KML_NS}}}Placemark") + ET.SubElement(pm, f"{{{KML_NS}}}name").text = sanitize_text(r.get("name") or r.get("short_hash") or "Point") + ET.SubElement(pm, f"{{{KML_NS}}}styleUrl").text = "#style_" + kml_id(gid) + ET.SubElement(pm, f"{{{KML_NS}}}description").text = html_description(r) + if include_extended_data: + ext = ET.SubElement(pm, f"{{{KML_NS}}}ExtendedData") + for k, v in sorted((r.get("props") or {}).items()): + data = ET.SubElement(ext, f"{{{KML_NS}}}Data", {"name": sanitize_text(k)}) + ET.SubElement(data, f"{{{KML_NS}}}value").text = sanitize_text(v) + data = ET.SubElement(ext, f"{{{KML_NS}}}Data", {"name": "stage2:group_id"}) + ET.SubElement(data, f"{{{KML_NS}}}value").text = sanitize_text(r.get("group_id")) + data = ET.SubElement(ext, f"{{{KML_NS}}}Data", {"name": "stage2:stable_hash"}) + ET.SubElement(data, f"{{{KML_NS}}}value").text = sanitize_text(r.get("short_hash")) + point = ET.SubElement(pm, f"{{{KML_NS}}}Point") + ET.SubElement(point, f"{{{KML_NS}}}coordinates").text = f"{float(r['lon']):.8f},{float(r['lat']):.8f},0" + + tree = ET.ElementTree(kml) + try: + ET.indent(tree, space=" ") + except Exception: + pass + with path.open("wb") as f: + tree.write(f, encoding="utf-8", xml_declaration=True) + + +def write_kmz(records: list[dict[str, Any]], path: Path, **kwargs) -> None: + import tempfile + with tempfile.TemporaryDirectory() as td: + tmp = Path(td) / "doc.kml" + write_kml(records, tmp, document_name=kwargs.pop("document_name", path.stem), **kwargs) + with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zf: + zf.write(tmp, "doc.kml") + + +def write_kml_split_dir(records: list[dict[str, Any]], outdir: Path, *, kmz: bool = False, **kwargs) -> None: + outdir.mkdir(parents=True, exist_ok=True) + grouped: dict[str, list[dict[str, Any]]] = defaultdict(list) + labels: dict[str, str] = {} + for r in records: + gid = sanitize_text(r.get("group_id") or "ungrouped") + grouped[gid].append(r) + labels.setdefault(gid, sanitize_text(r.get("group_label") or gid)) + manifest = [] + base_kwargs = dict(kwargs) + base_kwargs.pop("document_name", None) + for gid, recs in sorted(grouped.items(), key=lambda kv: labels.get(kv[0], kv[0]).casefold()): + stem = safe_filename(f"{labels.get(gid, gid)}__{gid}") + path = outdir / f"{stem}.{'kmz' if kmz else 'kml'}" + if kmz: + write_kmz(recs, path, document_name=labels.get(gid, gid), **base_kwargs) + else: + write_kml(recs, path, document_name=labels.get(gid, gid), **base_kwargs) + manifest.append({"group_id": gid, "group_label": labels.get(gid, gid), "count": len(recs), "file": path.name}) + save_json(outdir / "manifest.json", {"groups": manifest, "file_type": "kmz" if kmz else "kml"}) + def main(argv=None): ap = argparse.ArgumentParser(description="Stage 2 packager: analyze landmark CSVs into configurable group definitions, then build OSM/GPX with stable hashed names and styles.") sub = ap.add_subparsers(dest="cmd", required=True) @@ -1013,6 +1187,16 @@ def main(argv=None): b.add_argument("--resolved-csv", type=Path) b.add_argument("--osm", type=Path) b.add_argument("--gpx", type=Path) + b.add_argument("--kml", type=Path, help="Write Google Earth KML with one toggleable Folder per configured group") + b.add_argument("--kmz", type=Path, help="Write Google Earth KMZ with one toggleable Folder per configured group") + b.add_argument("--kml-split-dir", type=Path, help="Write one KML per group plus manifest.json for selective loading") + b.add_argument("--kmz-split-dir", type=Path, help="Write one KMZ per group plus manifest.json for selective loading") + b.add_argument("--kml-document-name", help="Document name shown in Google Earth") + b.add_argument("--kml-folder-visibility", type=int, choices=[0,1], default=1, help="Initial visibility for group folders") + b.add_argument("--kml-folder-open", type=int, choices=[0,1], default=0, help="Initial expanded/collapsed state for group folders") + b.add_argument("--kml-label-scale", type=float, default=0.0, help="Google Earth label scale; 0 hides always-on labels for performance") + b.add_argument("--kml-icon-scale", type=float, default=1.1) + b.add_argument("--kml-no-extended-data", action="store_true", help="Do not write per-point ExtendedData properties") b.add_argument("--summary-json", type=Path) b.add_argument("--unmatched-csv", type=Path, help="Write unmatched input landmarks for debugging config misses") b.add_argument("--interactive-edit-config", action="store_true", help="Review/edit naming and postprocess tags before build") @@ -1060,7 +1244,7 @@ def main(argv=None): radius = args.dedupe_radius_m if args.dedupe_radius_m is not None else float(dedupe_cfg.get("radius_m", 12.0)) items = dedupe(items, radius_m=radius, mode=mode) records, unmatched = build_records(items, cfg) - if not any([args.resolved_csv, args.osm, args.gpx, args.summary_json, args.unmatched_csv]): + if not any([args.resolved_csv, args.osm, args.gpx, args.kml, args.kmz, args.kml_split_dir, args.kmz_split_dir, args.summary_json, args.unmatched_csv]): print(f"[info] built {len(records)} records") print(f"[info] unmatched {len(unmatched)} landmarks") return 0 @@ -1073,6 +1257,26 @@ def main(argv=None): if args.gpx: write_gpx(records, args.gpx) print(f"[info] wrote GPX: {args.gpx}") + kml_kwargs = { + "document_name": args.kml_document_name, + "folder_visibility": args.kml_folder_visibility, + "folder_open": args.kml_folder_open, + "label_scale": args.kml_label_scale, + "icon_scale": args.kml_icon_scale, + "include_extended_data": not args.kml_no_extended_data, + } + if args.kml: + write_kml(records, args.kml, **kml_kwargs) + print(f"[info] wrote Google Earth KML: {args.kml}") + if args.kmz: + write_kmz(records, args.kmz, **kml_kwargs) + print(f"[info] wrote Google Earth KMZ: {args.kmz}") + if args.kml_split_dir: + write_kml_split_dir(records, args.kml_split_dir, kmz=False, **kml_kwargs) + print(f"[info] wrote per-group Google Earth KML files: {args.kml_split_dir}") + if args.kmz_split_dir: + write_kml_split_dir(records, args.kmz_split_dir, kmz=True, **kml_kwargs) + print(f"[info] wrote per-group Google Earth KMZ files: {args.kmz_split_dir}") if args.unmatched_csv: write_unmatched_csv(unmatched, args.unmatched_csv) print(f"[info] wrote unmatched CSV: {args.unmatched_csv}") diff --git a/stage-2-parse-stage-1/readme.md b/stage-2-parse-stage-1/readme.md index 5e5c44e..a76aac1 100644 --- a/stage-2-parse-stage-1/readme.md +++ b/stage-2-parse-stage-1/readme.md @@ -90,3 +90,92 @@ That gives you exactly the pattern you asked for: * keep everything in the same config file The next strong upgrade would be adding a dedicated `--run-cmd-before-eval-on-enter/--no-auto-open` toggle, so you can choose whether each group auto-opens the first example or only opens when you press `o`. + +Implemented it in stage 2. + +Download the upgraded script here: + +[landmarks_csv_to_osmand_google_earth.py](sandbox:/mnt/data/landmarks_csv_to_osmand_google_earth.py) + +I added Google Earth output on top of the existing stage-2 config workflow. Stage 1 already exports the needed `point_group_key`, raw Garmin type, semantic JSON, image metadata, and coordinates into CSV, so stage 2 can reuse that directly. The existing stage-2 logic already resolves records through the config, applying group assignment, naming, style, postprocess tags, and stable hashes, so I hooked Google Earth export after that same resolved-record stage. + +New outputs: + +```bash +--kml output.kml +--kmz output.kmz +--kml-split-dir google-earth-kml-groups +--kmz-split-dir google-earth-kmz-groups +``` + +Each KML/KMZ contains: + +* one Google Earth **Folder per configured group** +* toggleable components in the Google Earth sidebar +* per-group KML styles generated from your config color/icon +* point name from your stage-2 naming logic +* description table with Garmin/source/hash/config metadata +* `ExtendedData` for machine-readable properties + +Example build: + +```bash +python landmarks_csv_to_osmand_google_earth.py build parsed-landmarks/csv/*.csv ^ + --config stage2_config.json ^ + --kml bgmountains_landmarks.kml ^ + --kmz bgmountains_landmarks.kmz ^ + --resolved-csv resolved.csv ^ + --summary-json build_summary.json +``` + +For large datasets, use this instead: + +```bash +python landmarks_csv_to_osmand_google_earth.py build parsed-landmarks/csv/*.csv ^ + --config stage2_config.json ^ + --kmz-split-dir google-earth-groups ^ + --kml-folder-visibility 0 ^ + --kml-label-scale 0 +``` + +That creates one `.kmz` per group plus: + +```text +manifest.json +``` + +This is better for Google Earth when you have tens of thousands of points, because you can load only the group you are inspecting. + +Useful options added: + +```bash +--kml-folder-visibility 0 +``` + +Starts all folders hidden, so you enable groups manually in Google Earth. + +```bash +--kml-folder-open 0 +``` + +Keeps folders collapsed. + +```bash +--kml-label-scale 0 +``` + +Hides always-on labels. This is the default because 100k labels will destroy usability. + +```bash +--kml-label-scale 0.7 +``` + +Shows labels if you want them. + +```bash +--kml-no-extended-data +``` + +Makes smaller KML/KMZ files by removing the full property dump. + +The KML export uses the same config-controlled naming/postprocessing path as your existing OSM/GPX generation. The previous stage-2 build command already supported OSM, GPX, resolved CSV, and unmatched CSV outputs; I added KML/KMZ into that same build output branch.