diff --git a/stage-2-parse-stage-1/landmarks_csv_to_osmand.py b/stage-2-parse-stage-1/landmarks_csv_to_osmand.py
index a82adfa..4ade7f6 100644
--- a/stage-2-parse-stage-1/landmarks_csv_to_osmand.py
+++ b/stage-2-parse-stage-1/landmarks_csv_to_osmand.py
@@ -11,6 +11,8 @@ import math
import re
import sys
import subprocess
+import zipfile
+import html
import xml.etree.ElementTree as ET
from collections import Counter, defaultdict
from dataclasses import dataclass, field
@@ -19,7 +21,9 @@ from typing import Any, Iterable, Optional
OSMAND_NS = "https://osmand.net"
GPX_NS = "http://www.topografix.com/GPX/1/1"
+KML_NS = "http://www.opengis.net/kml/2.2"
ET.register_namespace("osmand", OSMAND_NS)
+ET.register_namespace("", KML_NS)
EARTH_M_PER_DEG_LAT = 111_320.0
_XML_INVALID_RE = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\uD800-\uDFFF\uFFFE\uFFFF]")
@@ -986,6 +990,176 @@ def write_gpx(records: list[dict[str, Any]], path: Path):
with path.open("wb") as f:
tree.write(f, encoding="utf-8", xml_declaration=True)
+
+
+def kml_color(value: object, alpha: str = "ff") -> str:
+ """Convert #RRGGBB / RRGGBB / #AARRGGBB to KML aabbggrr."""
+ s = sanitize_text(value).strip()
+ if not s:
+ s = "#FB8C00"
+ if s.startswith("#"):
+ s = s[1:]
+ s = re.sub(r"[^0-9A-Fa-f]", "", s)
+ if len(s) == 8:
+ # Input treated as AARRGGBB.
+ aa, rr, gg, bb = s[0:2], s[2:4], s[4:6], s[6:8]
+ elif len(s) == 6:
+ aa, rr, gg, bb = alpha, s[0:2], s[2:4], s[4:6]
+ else:
+ aa, rr, gg, bb = alpha, "FB", "8C", "00"
+ return f"{aa}{bb}{gg}{rr}".lower()
+
+
+def kml_id(value: object) -> str:
+ s = sanitize_text(value).strip() or "id"
+ s = re.sub(r"[^A-Za-z0-9_\-.]+", "_", s)
+ if not re.match(r"[A-Za-z_]", s):
+ s = "x_" + s
+ return s[:120]
+
+
+def safe_filename(value: object, default: str = "group") -> str:
+ s = sanitize_text(value).strip() or default
+ s = re.sub(r"[^A-Za-z0-9_\-.\u0400-\u04FF]+", "_", s)
+ s = s.strip("._ ") or default
+ return s[:120]
+
+
+def kml_icon_href(icon: str) -> str:
+ # These are remote Google Earth built-in-ish icon URLs. Color is controlled by IconStyle color.
+ i = sanitize_text(icon).strip().lower()
+ if i in {"water_drop", "water", "drinking_water"}:
+ return "http://maps.google.com/mapfiles/kml/paddle/wht-circle.png"
+ if i in {"special_star", "star", "peak", "summit"}:
+ return "http://maps.google.com/mapfiles/kml/paddle/wht-stars.png"
+ if i in {"town", "city", "village"}:
+ return "http://maps.google.com/mapfiles/kml/paddle/wht-blank.png"
+ return "http://maps.google.com/mapfiles/kml/paddle/wht-blank.png"
+
+
+def html_description(record: dict[str, Any], max_props: int = 80) -> str:
+ props = record.get("props") or {}
+ rows = []
+ rows.append(f"
{html.escape(sanitize_text(record.get('name', '')))}
")
+ rows.append("")
+ basics = {
+ "group_id": record.get("group_id", ""),
+ "group_label": record.get("group_label", ""),
+ "stable_hash": record.get("short_hash", ""),
+ "lat": f"{record.get('lat', 0):.8f}",
+ "lon": f"{record.get('lon', 0):.8f}",
+ "duplicate_count": record.get("duplicate_count", ""),
+ }
+ for k, v in basics.items():
+ rows.append(f"| {html.escape(str(k))} | {html.escape(sanitize_text(v))} |
")
+ rows.append("
")
+ rows.append("
")
+ for idx, (k, v) in enumerate(sorted(props.items())):
+ if idx >= max_props:
+ rows.append(f"| ... {len(props)-max_props} more properties |
")
+ break
+ rows.append(f"| {html.escape(sanitize_text(k))} | {html.escape(sanitize_text(v))} |
")
+ rows.append("
")
+ return "\n".join(rows)
+
+
+def write_kml(
+ records: list[dict[str, Any]],
+ path: Path,
+ *,
+ document_name: Optional[str] = None,
+ folder_visibility: int = 1,
+ folder_open: int = 0,
+ label_scale: float = 0.0,
+ icon_scale: float = 1.1,
+ include_extended_data: bool = True,
+) -> None:
+ kml = ET.Element(f"{{{KML_NS}}}kml")
+ doc = ET.SubElement(kml, f"{{{KML_NS}}}Document")
+ ET.SubElement(doc, f"{{{KML_NS}}}name").text = sanitize_text(document_name or path.stem)
+ ET.SubElement(doc, f"{{{KML_NS}}}open").text = "1"
+
+ grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
+ group_meta: dict[str, dict[str, Any]] = {}
+ for r in records:
+ gid = sanitize_text(r.get("group_id") or "ungrouped")
+ grouped[gid].append(r)
+ group_meta.setdefault(gid, r)
+
+ for gid, sample in sorted(group_meta.items(), key=lambda kv: sanitize_text(kv[1].get("group_label") or kv[0]).casefold()):
+ sid = "style_" + kml_id(gid)
+ style = ET.SubElement(doc, f"{{{KML_NS}}}Style", {"id": sid})
+ icon_style = ET.SubElement(style, f"{{{KML_NS}}}IconStyle")
+ ET.SubElement(icon_style, f"{{{KML_NS}}}color").text = kml_color(sample.get("color"))
+ ET.SubElement(icon_style, f"{{{KML_NS}}}scale").text = f"{icon_scale:g}"
+ icon = ET.SubElement(icon_style, f"{{{KML_NS}}}Icon")
+ ET.SubElement(icon, f"{{{KML_NS}}}href").text = kml_icon_href(str(sample.get("icon") or "marker"))
+ label_style = ET.SubElement(style, f"{{{KML_NS}}}LabelStyle")
+ ET.SubElement(label_style, f"{{{KML_NS}}}color").text = kml_color(sample.get("color"))
+ ET.SubElement(label_style, f"{{{KML_NS}}}scale").text = f"{label_scale:g}"
+
+ for gid, group_records in sorted(grouped.items(), key=lambda kv: sanitize_text(group_meta[kv[0]].get("group_label") or kv[0]).casefold()):
+ sample = group_meta[gid]
+ folder = ET.SubElement(doc, f"{{{KML_NS}}}Folder")
+ ET.SubElement(folder, f"{{{KML_NS}}}name").text = f"{sanitize_text(sample.get('group_label') or gid)} ({len(group_records)})"
+ ET.SubElement(folder, f"{{{KML_NS}}}visibility").text = "1" if folder_visibility else "0"
+ ET.SubElement(folder, f"{{{KML_NS}}}open").text = "1" if folder_open else "0"
+ for r in group_records:
+ pm = ET.SubElement(folder, f"{{{KML_NS}}}Placemark")
+ ET.SubElement(pm, f"{{{KML_NS}}}name").text = sanitize_text(r.get("name") or r.get("short_hash") or "Point")
+ ET.SubElement(pm, f"{{{KML_NS}}}styleUrl").text = "#style_" + kml_id(gid)
+ ET.SubElement(pm, f"{{{KML_NS}}}description").text = html_description(r)
+ if include_extended_data:
+ ext = ET.SubElement(pm, f"{{{KML_NS}}}ExtendedData")
+ for k, v in sorted((r.get("props") or {}).items()):
+ data = ET.SubElement(ext, f"{{{KML_NS}}}Data", {"name": sanitize_text(k)})
+ ET.SubElement(data, f"{{{KML_NS}}}value").text = sanitize_text(v)
+ data = ET.SubElement(ext, f"{{{KML_NS}}}Data", {"name": "stage2:group_id"})
+ ET.SubElement(data, f"{{{KML_NS}}}value").text = sanitize_text(r.get("group_id"))
+ data = ET.SubElement(ext, f"{{{KML_NS}}}Data", {"name": "stage2:stable_hash"})
+ ET.SubElement(data, f"{{{KML_NS}}}value").text = sanitize_text(r.get("short_hash"))
+ point = ET.SubElement(pm, f"{{{KML_NS}}}Point")
+ ET.SubElement(point, f"{{{KML_NS}}}coordinates").text = f"{float(r['lon']):.8f},{float(r['lat']):.8f},0"
+
+ tree = ET.ElementTree(kml)
+ try:
+ ET.indent(tree, space=" ")
+ except Exception:
+ pass
+ with path.open("wb") as f:
+ tree.write(f, encoding="utf-8", xml_declaration=True)
+
+
+def write_kmz(records: list[dict[str, Any]], path: Path, **kwargs) -> None:
+ import tempfile
+ with tempfile.TemporaryDirectory() as td:
+ tmp = Path(td) / "doc.kml"
+ write_kml(records, tmp, document_name=kwargs.pop("document_name", path.stem), **kwargs)
+ with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
+ zf.write(tmp, "doc.kml")
+
+
+def write_kml_split_dir(records: list[dict[str, Any]], outdir: Path, *, kmz: bool = False, **kwargs) -> None:
+ outdir.mkdir(parents=True, exist_ok=True)
+ grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
+ labels: dict[str, str] = {}
+ for r in records:
+ gid = sanitize_text(r.get("group_id") or "ungrouped")
+ grouped[gid].append(r)
+ labels.setdefault(gid, sanitize_text(r.get("group_label") or gid))
+ manifest = []
+ base_kwargs = dict(kwargs)
+ base_kwargs.pop("document_name", None)
+ for gid, recs in sorted(grouped.items(), key=lambda kv: labels.get(kv[0], kv[0]).casefold()):
+ stem = safe_filename(f"{labels.get(gid, gid)}__{gid}")
+ path = outdir / f"{stem}.{'kmz' if kmz else 'kml'}"
+ if kmz:
+ write_kmz(recs, path, document_name=labels.get(gid, gid), **base_kwargs)
+ else:
+ write_kml(recs, path, document_name=labels.get(gid, gid), **base_kwargs)
+ manifest.append({"group_id": gid, "group_label": labels.get(gid, gid), "count": len(recs), "file": path.name})
+ save_json(outdir / "manifest.json", {"groups": manifest, "file_type": "kmz" if kmz else "kml"})
+
def main(argv=None):
ap = argparse.ArgumentParser(description="Stage 2 packager: analyze landmark CSVs into configurable group definitions, then build OSM/GPX with stable hashed names and styles.")
sub = ap.add_subparsers(dest="cmd", required=True)
@@ -1013,6 +1187,16 @@ def main(argv=None):
b.add_argument("--resolved-csv", type=Path)
b.add_argument("--osm", type=Path)
b.add_argument("--gpx", type=Path)
+ b.add_argument("--kml", type=Path, help="Write Google Earth KML with one toggleable Folder per configured group")
+ b.add_argument("--kmz", type=Path, help="Write Google Earth KMZ with one toggleable Folder per configured group")
+ b.add_argument("--kml-split-dir", type=Path, help="Write one KML per group plus manifest.json for selective loading")
+ b.add_argument("--kmz-split-dir", type=Path, help="Write one KMZ per group plus manifest.json for selective loading")
+ b.add_argument("--kml-document-name", help="Document name shown in Google Earth")
+ b.add_argument("--kml-folder-visibility", type=int, choices=[0,1], default=1, help="Initial visibility for group folders")
+ b.add_argument("--kml-folder-open", type=int, choices=[0,1], default=0, help="Initial expanded/collapsed state for group folders")
+ b.add_argument("--kml-label-scale", type=float, default=0.0, help="Google Earth label scale; 0 hides always-on labels for performance")
+ b.add_argument("--kml-icon-scale", type=float, default=1.1)
+ b.add_argument("--kml-no-extended-data", action="store_true", help="Do not write per-point ExtendedData properties")
b.add_argument("--summary-json", type=Path)
b.add_argument("--unmatched-csv", type=Path, help="Write unmatched input landmarks for debugging config misses")
b.add_argument("--interactive-edit-config", action="store_true", help="Review/edit naming and postprocess tags before build")
@@ -1060,7 +1244,7 @@ def main(argv=None):
radius = args.dedupe_radius_m if args.dedupe_radius_m is not None else float(dedupe_cfg.get("radius_m", 12.0))
items = dedupe(items, radius_m=radius, mode=mode)
records, unmatched = build_records(items, cfg)
- if not any([args.resolved_csv, args.osm, args.gpx, args.summary_json, args.unmatched_csv]):
+ if not any([args.resolved_csv, args.osm, args.gpx, args.kml, args.kmz, args.kml_split_dir, args.kmz_split_dir, args.summary_json, args.unmatched_csv]):
print(f"[info] built {len(records)} records")
print(f"[info] unmatched {len(unmatched)} landmarks")
return 0
@@ -1073,6 +1257,26 @@ def main(argv=None):
if args.gpx:
write_gpx(records, args.gpx)
print(f"[info] wrote GPX: {args.gpx}")
+ kml_kwargs = {
+ "document_name": args.kml_document_name,
+ "folder_visibility": args.kml_folder_visibility,
+ "folder_open": args.kml_folder_open,
+ "label_scale": args.kml_label_scale,
+ "icon_scale": args.kml_icon_scale,
+ "include_extended_data": not args.kml_no_extended_data,
+ }
+ if args.kml:
+ write_kml(records, args.kml, **kml_kwargs)
+ print(f"[info] wrote Google Earth KML: {args.kml}")
+ if args.kmz:
+ write_kmz(records, args.kmz, **kml_kwargs)
+ print(f"[info] wrote Google Earth KMZ: {args.kmz}")
+ if args.kml_split_dir:
+ write_kml_split_dir(records, args.kml_split_dir, kmz=False, **kml_kwargs)
+ print(f"[info] wrote per-group Google Earth KML files: {args.kml_split_dir}")
+ if args.kmz_split_dir:
+ write_kml_split_dir(records, args.kmz_split_dir, kmz=True, **kml_kwargs)
+ print(f"[info] wrote per-group Google Earth KMZ files: {args.kmz_split_dir}")
if args.unmatched_csv:
write_unmatched_csv(unmatched, args.unmatched_csv)
print(f"[info] wrote unmatched CSV: {args.unmatched_csv}")
diff --git a/stage-2-parse-stage-1/readme.md b/stage-2-parse-stage-1/readme.md
index 5e5c44e..a76aac1 100644
--- a/stage-2-parse-stage-1/readme.md
+++ b/stage-2-parse-stage-1/readme.md
@@ -90,3 +90,92 @@ That gives you exactly the pattern you asked for:
* keep everything in the same config file
The next strong upgrade would be adding a dedicated `--run-cmd-before-eval-on-enter/--no-auto-open` toggle, so you can choose whether each group auto-opens the first example or only opens when you press `o`.
+
+Implemented it in stage 2.
+
+Download the upgraded script here:
+
+[landmarks_csv_to_osmand_google_earth.py](sandbox:/mnt/data/landmarks_csv_to_osmand_google_earth.py)
+
+I added Google Earth output on top of the existing stage-2 config workflow. Stage 1 already exports the needed `point_group_key`, raw Garmin type, semantic JSON, image metadata, and coordinates into CSV, so stage 2 can reuse that directly. The existing stage-2 logic already resolves records through the config, applying group assignment, naming, style, postprocess tags, and stable hashes, so I hooked Google Earth export after that same resolved-record stage.
+
+New outputs:
+
+```bash
+--kml output.kml
+--kmz output.kmz
+--kml-split-dir google-earth-kml-groups
+--kmz-split-dir google-earth-kmz-groups
+```
+
+Each KML/KMZ contains:
+
+* one Google Earth **Folder per configured group**
+* toggleable components in the Google Earth sidebar
+* per-group KML styles generated from your config color/icon
+* point name from your stage-2 naming logic
+* description table with Garmin/source/hash/config metadata
+* `ExtendedData` for machine-readable properties
+
+Example build:
+
+```bash
+python landmarks_csv_to_osmand_google_earth.py build parsed-landmarks/csv/*.csv ^
+ --config stage2_config.json ^
+ --kml bgmountains_landmarks.kml ^
+ --kmz bgmountains_landmarks.kmz ^
+ --resolved-csv resolved.csv ^
+ --summary-json build_summary.json
+```
+
+For large datasets, use this instead:
+
+```bash
+python landmarks_csv_to_osmand_google_earth.py build parsed-landmarks/csv/*.csv ^
+ --config stage2_config.json ^
+ --kmz-split-dir google-earth-groups ^
+ --kml-folder-visibility 0 ^
+ --kml-label-scale 0
+```
+
+That creates one `.kmz` per group plus:
+
+```text
+manifest.json
+```
+
+This is better for Google Earth when you have tens of thousands of points, because you can load only the group you are inspecting.
+
+Useful options added:
+
+```bash
+--kml-folder-visibility 0
+```
+
+Starts all folders hidden, so you enable groups manually in Google Earth.
+
+```bash
+--kml-folder-open 0
+```
+
+Keeps folders collapsed.
+
+```bash
+--kml-label-scale 0
+```
+
+Hides always-on labels. This is the default because 100k labels will destroy usability.
+
+```bash
+--kml-label-scale 0.7
+```
+
+Shows labels if you want them.
+
+```bash
+--kml-no-extended-data
+```
+
+Makes smaller KML/KMZ files by removing the full property dump.
+
+The KML export uses the same config-controlled naming/postprocessing path as your existing OSM/GPX generation. The previous stage-2 build command already supported OSM, GPX, resolved CSV, and unmatched CSV outputs; I added KML/KMZ into that same build output branch.