stage-2 revamp to output to google earth

This commit is contained in:
nq
2026-05-03 02:00:44 -07:00
parent b3587bee51
commit 5aad994de2
2 changed files with 294 additions and 1 deletions

View File

@@ -11,6 +11,8 @@ import math
import re
import sys
import subprocess
import zipfile
import html
import xml.etree.ElementTree as ET
from collections import Counter, defaultdict
from dataclasses import dataclass, field
@@ -19,7 +21,9 @@ from typing import Any, Iterable, Optional
OSMAND_NS = "https://osmand.net"
GPX_NS = "http://www.topografix.com/GPX/1/1"
KML_NS = "http://www.opengis.net/kml/2.2"
ET.register_namespace("osmand", OSMAND_NS)
ET.register_namespace("", KML_NS)
EARTH_M_PER_DEG_LAT = 111_320.0
_XML_INVALID_RE = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\uD800-\uDFFF\uFFFE\uFFFF]")
@@ -986,6 +990,176 @@ def write_gpx(records: list[dict[str, Any]], path: Path):
with path.open("wb") as f:
tree.write(f, encoding="utf-8", xml_declaration=True)
def kml_color(value: object, alpha: str = "ff") -> str:
"""Convert #RRGGBB / RRGGBB / #AARRGGBB to KML aabbggrr."""
s = sanitize_text(value).strip()
if not s:
s = "#FB8C00"
if s.startswith("#"):
s = s[1:]
s = re.sub(r"[^0-9A-Fa-f]", "", s)
if len(s) == 8:
# Input treated as AARRGGBB.
aa, rr, gg, bb = s[0:2], s[2:4], s[4:6], s[6:8]
elif len(s) == 6:
aa, rr, gg, bb = alpha, s[0:2], s[2:4], s[4:6]
else:
aa, rr, gg, bb = alpha, "FB", "8C", "00"
return f"{aa}{bb}{gg}{rr}".lower()
def kml_id(value: object) -> str:
s = sanitize_text(value).strip() or "id"
s = re.sub(r"[^A-Za-z0-9_\-.]+", "_", s)
if not re.match(r"[A-Za-z_]", s):
s = "x_" + s
return s[:120]
def safe_filename(value: object, default: str = "group") -> str:
s = sanitize_text(value).strip() or default
s = re.sub(r"[^A-Za-z0-9_\-.\u0400-\u04FF]+", "_", s)
s = s.strip("._ ") or default
return s[:120]
def kml_icon_href(icon: str) -> str:
# These are remote Google Earth built-in-ish icon URLs. Color is controlled by IconStyle color.
i = sanitize_text(icon).strip().lower()
if i in {"water_drop", "water", "drinking_water"}:
return "http://maps.google.com/mapfiles/kml/paddle/wht-circle.png"
if i in {"special_star", "star", "peak", "summit"}:
return "http://maps.google.com/mapfiles/kml/paddle/wht-stars.png"
if i in {"town", "city", "village"}:
return "http://maps.google.com/mapfiles/kml/paddle/wht-blank.png"
return "http://maps.google.com/mapfiles/kml/paddle/wht-blank.png"
def html_description(record: dict[str, Any], max_props: int = 80) -> str:
props = record.get("props") or {}
rows = []
rows.append(f"<h3>{html.escape(sanitize_text(record.get('name', '')))}</h3>")
rows.append("<table border='1' cellpadding='3' cellspacing='0'>")
basics = {
"group_id": record.get("group_id", ""),
"group_label": record.get("group_label", ""),
"stable_hash": record.get("short_hash", ""),
"lat": f"{record.get('lat', 0):.8f}",
"lon": f"{record.get('lon', 0):.8f}",
"duplicate_count": record.get("duplicate_count", ""),
}
for k, v in basics.items():
rows.append(f"<tr><th>{html.escape(str(k))}</th><td>{html.escape(sanitize_text(v))}</td></tr>")
rows.append("</table>")
rows.append("<br/><table border='1' cellpadding='3' cellspacing='0'>")
for idx, (k, v) in enumerate(sorted(props.items())):
if idx >= max_props:
rows.append(f"<tr><td colspan='2'>... {len(props)-max_props} more properties</td></tr>")
break
rows.append(f"<tr><th>{html.escape(sanitize_text(k))}</th><td>{html.escape(sanitize_text(v))}</td></tr>")
rows.append("</table>")
return "\n".join(rows)
def write_kml(
records: list[dict[str, Any]],
path: Path,
*,
document_name: Optional[str] = None,
folder_visibility: int = 1,
folder_open: int = 0,
label_scale: float = 0.0,
icon_scale: float = 1.1,
include_extended_data: bool = True,
) -> None:
kml = ET.Element(f"{{{KML_NS}}}kml")
doc = ET.SubElement(kml, f"{{{KML_NS}}}Document")
ET.SubElement(doc, f"{{{KML_NS}}}name").text = sanitize_text(document_name or path.stem)
ET.SubElement(doc, f"{{{KML_NS}}}open").text = "1"
grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
group_meta: dict[str, dict[str, Any]] = {}
for r in records:
gid = sanitize_text(r.get("group_id") or "ungrouped")
grouped[gid].append(r)
group_meta.setdefault(gid, r)
for gid, sample in sorted(group_meta.items(), key=lambda kv: sanitize_text(kv[1].get("group_label") or kv[0]).casefold()):
sid = "style_" + kml_id(gid)
style = ET.SubElement(doc, f"{{{KML_NS}}}Style", {"id": sid})
icon_style = ET.SubElement(style, f"{{{KML_NS}}}IconStyle")
ET.SubElement(icon_style, f"{{{KML_NS}}}color").text = kml_color(sample.get("color"))
ET.SubElement(icon_style, f"{{{KML_NS}}}scale").text = f"{icon_scale:g}"
icon = ET.SubElement(icon_style, f"{{{KML_NS}}}Icon")
ET.SubElement(icon, f"{{{KML_NS}}}href").text = kml_icon_href(str(sample.get("icon") or "marker"))
label_style = ET.SubElement(style, f"{{{KML_NS}}}LabelStyle")
ET.SubElement(label_style, f"{{{KML_NS}}}color").text = kml_color(sample.get("color"))
ET.SubElement(label_style, f"{{{KML_NS}}}scale").text = f"{label_scale:g}"
for gid, group_records in sorted(grouped.items(), key=lambda kv: sanitize_text(group_meta[kv[0]].get("group_label") or kv[0]).casefold()):
sample = group_meta[gid]
folder = ET.SubElement(doc, f"{{{KML_NS}}}Folder")
ET.SubElement(folder, f"{{{KML_NS}}}name").text = f"{sanitize_text(sample.get('group_label') or gid)} ({len(group_records)})"
ET.SubElement(folder, f"{{{KML_NS}}}visibility").text = "1" if folder_visibility else "0"
ET.SubElement(folder, f"{{{KML_NS}}}open").text = "1" if folder_open else "0"
for r in group_records:
pm = ET.SubElement(folder, f"{{{KML_NS}}}Placemark")
ET.SubElement(pm, f"{{{KML_NS}}}name").text = sanitize_text(r.get("name") or r.get("short_hash") or "Point")
ET.SubElement(pm, f"{{{KML_NS}}}styleUrl").text = "#style_" + kml_id(gid)
ET.SubElement(pm, f"{{{KML_NS}}}description").text = html_description(r)
if include_extended_data:
ext = ET.SubElement(pm, f"{{{KML_NS}}}ExtendedData")
for k, v in sorted((r.get("props") or {}).items()):
data = ET.SubElement(ext, f"{{{KML_NS}}}Data", {"name": sanitize_text(k)})
ET.SubElement(data, f"{{{KML_NS}}}value").text = sanitize_text(v)
data = ET.SubElement(ext, f"{{{KML_NS}}}Data", {"name": "stage2:group_id"})
ET.SubElement(data, f"{{{KML_NS}}}value").text = sanitize_text(r.get("group_id"))
data = ET.SubElement(ext, f"{{{KML_NS}}}Data", {"name": "stage2:stable_hash"})
ET.SubElement(data, f"{{{KML_NS}}}value").text = sanitize_text(r.get("short_hash"))
point = ET.SubElement(pm, f"{{{KML_NS}}}Point")
ET.SubElement(point, f"{{{KML_NS}}}coordinates").text = f"{float(r['lon']):.8f},{float(r['lat']):.8f},0"
tree = ET.ElementTree(kml)
try:
ET.indent(tree, space=" ")
except Exception:
pass
with path.open("wb") as f:
tree.write(f, encoding="utf-8", xml_declaration=True)
def write_kmz(records: list[dict[str, Any]], path: Path, **kwargs) -> None:
import tempfile
with tempfile.TemporaryDirectory() as td:
tmp = Path(td) / "doc.kml"
write_kml(records, tmp, document_name=kwargs.pop("document_name", path.stem), **kwargs)
with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
zf.write(tmp, "doc.kml")
def write_kml_split_dir(records: list[dict[str, Any]], outdir: Path, *, kmz: bool = False, **kwargs) -> None:
outdir.mkdir(parents=True, exist_ok=True)
grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
labels: dict[str, str] = {}
for r in records:
gid = sanitize_text(r.get("group_id") or "ungrouped")
grouped[gid].append(r)
labels.setdefault(gid, sanitize_text(r.get("group_label") or gid))
manifest = []
base_kwargs = dict(kwargs)
base_kwargs.pop("document_name", None)
for gid, recs in sorted(grouped.items(), key=lambda kv: labels.get(kv[0], kv[0]).casefold()):
stem = safe_filename(f"{labels.get(gid, gid)}__{gid}")
path = outdir / f"{stem}.{'kmz' if kmz else 'kml'}"
if kmz:
write_kmz(recs, path, document_name=labels.get(gid, gid), **base_kwargs)
else:
write_kml(recs, path, document_name=labels.get(gid, gid), **base_kwargs)
manifest.append({"group_id": gid, "group_label": labels.get(gid, gid), "count": len(recs), "file": path.name})
save_json(outdir / "manifest.json", {"groups": manifest, "file_type": "kmz" if kmz else "kml"})
def main(argv=None):
ap = argparse.ArgumentParser(description="Stage 2 packager: analyze landmark CSVs into configurable group definitions, then build OSM/GPX with stable hashed names and styles.")
sub = ap.add_subparsers(dest="cmd", required=True)
@@ -1013,6 +1187,16 @@ def main(argv=None):
b.add_argument("--resolved-csv", type=Path)
b.add_argument("--osm", type=Path)
b.add_argument("--gpx", type=Path)
b.add_argument("--kml", type=Path, help="Write Google Earth KML with one toggleable Folder per configured group")
b.add_argument("--kmz", type=Path, help="Write Google Earth KMZ with one toggleable Folder per configured group")
b.add_argument("--kml-split-dir", type=Path, help="Write one KML per group plus manifest.json for selective loading")
b.add_argument("--kmz-split-dir", type=Path, help="Write one KMZ per group plus manifest.json for selective loading")
b.add_argument("--kml-document-name", help="Document name shown in Google Earth")
b.add_argument("--kml-folder-visibility", type=int, choices=[0,1], default=1, help="Initial visibility for group folders")
b.add_argument("--kml-folder-open", type=int, choices=[0,1], default=0, help="Initial expanded/collapsed state for group folders")
b.add_argument("--kml-label-scale", type=float, default=0.0, help="Google Earth label scale; 0 hides always-on labels for performance")
b.add_argument("--kml-icon-scale", type=float, default=1.1)
b.add_argument("--kml-no-extended-data", action="store_true", help="Do not write per-point ExtendedData properties")
b.add_argument("--summary-json", type=Path)
b.add_argument("--unmatched-csv", type=Path, help="Write unmatched input landmarks for debugging config misses")
b.add_argument("--interactive-edit-config", action="store_true", help="Review/edit naming and postprocess tags before build")
@@ -1060,7 +1244,7 @@ def main(argv=None):
radius = args.dedupe_radius_m if args.dedupe_radius_m is not None else float(dedupe_cfg.get("radius_m", 12.0))
items = dedupe(items, radius_m=radius, mode=mode)
records, unmatched = build_records(items, cfg)
if not any([args.resolved_csv, args.osm, args.gpx, args.summary_json, args.unmatched_csv]):
if not any([args.resolved_csv, args.osm, args.gpx, args.kml, args.kmz, args.kml_split_dir, args.kmz_split_dir, args.summary_json, args.unmatched_csv]):
print(f"[info] built {len(records)} records")
print(f"[info] unmatched {len(unmatched)} landmarks")
return 0
@@ -1073,6 +1257,26 @@ def main(argv=None):
if args.gpx:
write_gpx(records, args.gpx)
print(f"[info] wrote GPX: {args.gpx}")
kml_kwargs = {
"document_name": args.kml_document_name,
"folder_visibility": args.kml_folder_visibility,
"folder_open": args.kml_folder_open,
"label_scale": args.kml_label_scale,
"icon_scale": args.kml_icon_scale,
"include_extended_data": not args.kml_no_extended_data,
}
if args.kml:
write_kml(records, args.kml, **kml_kwargs)
print(f"[info] wrote Google Earth KML: {args.kml}")
if args.kmz:
write_kmz(records, args.kmz, **kml_kwargs)
print(f"[info] wrote Google Earth KMZ: {args.kmz}")
if args.kml_split_dir:
write_kml_split_dir(records, args.kml_split_dir, kmz=False, **kml_kwargs)
print(f"[info] wrote per-group Google Earth KML files: {args.kml_split_dir}")
if args.kmz_split_dir:
write_kml_split_dir(records, args.kmz_split_dir, kmz=True, **kml_kwargs)
print(f"[info] wrote per-group Google Earth KMZ files: {args.kmz_split_dir}")
if args.unmatched_csv:
write_unmatched_csv(unmatched, args.unmatched_csv)
print(f"[info] wrote unmatched CSV: {args.unmatched_csv}")

View File

@@ -90,3 +90,92 @@ That gives you exactly the pattern you asked for:
* keep everything in the same config file
The next strong upgrade would be adding a dedicated `--run-cmd-before-eval-on-enter/--no-auto-open` toggle, so you can choose whether each group auto-opens the first example or only opens when you press `o`.
Implemented it in stage 2.
Download the upgraded script here:
[landmarks_csv_to_osmand_google_earth.py](sandbox:/mnt/data/landmarks_csv_to_osmand_google_earth.py)
I added Google Earth output on top of the existing stage-2 config workflow. Stage 1 already exports the needed `point_group_key`, raw Garmin type, semantic JSON, image metadata, and coordinates into CSV, so stage 2 can reuse that directly. The existing stage-2 logic already resolves records through the config, applying group assignment, naming, style, postprocess tags, and stable hashes, so I hooked Google Earth export after that same resolved-record stage.
New outputs:
```bash
--kml output.kml
--kmz output.kmz
--kml-split-dir google-earth-kml-groups
--kmz-split-dir google-earth-kmz-groups
```
Each KML/KMZ contains:
* one Google Earth **Folder per configured group**
* toggleable components in the Google Earth sidebar
* per-group KML styles generated from your config color/icon
* point name from your stage-2 naming logic
* description table with Garmin/source/hash/config metadata
* `ExtendedData` for machine-readable properties
Example build:
```bash
python landmarks_csv_to_osmand_google_earth.py build parsed-landmarks/csv/*.csv ^
--config stage2_config.json ^
--kml bgmountains_landmarks.kml ^
--kmz bgmountains_landmarks.kmz ^
--resolved-csv resolved.csv ^
--summary-json build_summary.json
```
For large datasets, use this instead:
```bash
python landmarks_csv_to_osmand_google_earth.py build parsed-landmarks/csv/*.csv ^
--config stage2_config.json ^
--kmz-split-dir google-earth-groups ^
--kml-folder-visibility 0 ^
--kml-label-scale 0
```
That creates one `.kmz` per group plus:
```text
manifest.json
```
This is better for Google Earth when you have tens of thousands of points, because you can load only the group you are inspecting.
Useful options added:
```bash
--kml-folder-visibility 0
```
Starts all folders hidden, so you enable groups manually in Google Earth.
```bash
--kml-folder-open 0
```
Keeps folders collapsed.
```bash
--kml-label-scale 0
```
Hides always-on labels. This is the default because 100k labels will destroy usability.
```bash
--kml-label-scale 0.7
```
Shows labels if you want them.
```bash
--kml-no-extended-data
```
Makes smaller KML/KMZ files by removing the full property dump.
The KML export uses the same config-controlled naming/postprocessing path as your existing OSM/GPX generation. The previous stage-2 build command already supported OSM, GPX, resolved CSV, and unmatched CSV outputs; I added KML/KMZ into that same build output branch.