stage-2 revamp to output to google earth

This commit is contained in:
nq
2026-05-03 02:00:44 -07:00
parent b3587bee51
commit 5aad994de2
2 changed files with 294 additions and 1 deletions

View File

@@ -11,6 +11,8 @@ import math
import re import re
import sys import sys
import subprocess import subprocess
import zipfile
import html
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from collections import Counter, defaultdict from collections import Counter, defaultdict
from dataclasses import dataclass, field from dataclasses import dataclass, field
@@ -19,7 +21,9 @@ from typing import Any, Iterable, Optional
OSMAND_NS = "https://osmand.net" OSMAND_NS = "https://osmand.net"
GPX_NS = "http://www.topografix.com/GPX/1/1" GPX_NS = "http://www.topografix.com/GPX/1/1"
KML_NS = "http://www.opengis.net/kml/2.2"
ET.register_namespace("osmand", OSMAND_NS) ET.register_namespace("osmand", OSMAND_NS)
ET.register_namespace("", KML_NS)
EARTH_M_PER_DEG_LAT = 111_320.0 EARTH_M_PER_DEG_LAT = 111_320.0
_XML_INVALID_RE = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\uD800-\uDFFF\uFFFE\uFFFF]") _XML_INVALID_RE = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\uD800-\uDFFF\uFFFE\uFFFF]")
@@ -986,6 +990,176 @@ def write_gpx(records: list[dict[str, Any]], path: Path):
with path.open("wb") as f: with path.open("wb") as f:
tree.write(f, encoding="utf-8", xml_declaration=True) tree.write(f, encoding="utf-8", xml_declaration=True)
def kml_color(value: object, alpha: str = "ff") -> str:
"""Convert #RRGGBB / RRGGBB / #AARRGGBB to KML aabbggrr."""
s = sanitize_text(value).strip()
if not s:
s = "#FB8C00"
if s.startswith("#"):
s = s[1:]
s = re.sub(r"[^0-9A-Fa-f]", "", s)
if len(s) == 8:
# Input treated as AARRGGBB.
aa, rr, gg, bb = s[0:2], s[2:4], s[4:6], s[6:8]
elif len(s) == 6:
aa, rr, gg, bb = alpha, s[0:2], s[2:4], s[4:6]
else:
aa, rr, gg, bb = alpha, "FB", "8C", "00"
return f"{aa}{bb}{gg}{rr}".lower()
def kml_id(value: object) -> str:
s = sanitize_text(value).strip() or "id"
s = re.sub(r"[^A-Za-z0-9_\-.]+", "_", s)
if not re.match(r"[A-Za-z_]", s):
s = "x_" + s
return s[:120]
def safe_filename(value: object, default: str = "group") -> str:
s = sanitize_text(value).strip() or default
s = re.sub(r"[^A-Za-z0-9_\-.\u0400-\u04FF]+", "_", s)
s = s.strip("._ ") or default
return s[:120]
def kml_icon_href(icon: str) -> str:
# These are remote Google Earth built-in-ish icon URLs. Color is controlled by IconStyle color.
i = sanitize_text(icon).strip().lower()
if i in {"water_drop", "water", "drinking_water"}:
return "http://maps.google.com/mapfiles/kml/paddle/wht-circle.png"
if i in {"special_star", "star", "peak", "summit"}:
return "http://maps.google.com/mapfiles/kml/paddle/wht-stars.png"
if i in {"town", "city", "village"}:
return "http://maps.google.com/mapfiles/kml/paddle/wht-blank.png"
return "http://maps.google.com/mapfiles/kml/paddle/wht-blank.png"
def html_description(record: dict[str, Any], max_props: int = 80) -> str:
props = record.get("props") or {}
rows = []
rows.append(f"<h3>{html.escape(sanitize_text(record.get('name', '')))}</h3>")
rows.append("<table border='1' cellpadding='3' cellspacing='0'>")
basics = {
"group_id": record.get("group_id", ""),
"group_label": record.get("group_label", ""),
"stable_hash": record.get("short_hash", ""),
"lat": f"{record.get('lat', 0):.8f}",
"lon": f"{record.get('lon', 0):.8f}",
"duplicate_count": record.get("duplicate_count", ""),
}
for k, v in basics.items():
rows.append(f"<tr><th>{html.escape(str(k))}</th><td>{html.escape(sanitize_text(v))}</td></tr>")
rows.append("</table>")
rows.append("<br/><table border='1' cellpadding='3' cellspacing='0'>")
for idx, (k, v) in enumerate(sorted(props.items())):
if idx >= max_props:
rows.append(f"<tr><td colspan='2'>... {len(props)-max_props} more properties</td></tr>")
break
rows.append(f"<tr><th>{html.escape(sanitize_text(k))}</th><td>{html.escape(sanitize_text(v))}</td></tr>")
rows.append("</table>")
return "\n".join(rows)
def write_kml(
records: list[dict[str, Any]],
path: Path,
*,
document_name: Optional[str] = None,
folder_visibility: int = 1,
folder_open: int = 0,
label_scale: float = 0.0,
icon_scale: float = 1.1,
include_extended_data: bool = True,
) -> None:
kml = ET.Element(f"{{{KML_NS}}}kml")
doc = ET.SubElement(kml, f"{{{KML_NS}}}Document")
ET.SubElement(doc, f"{{{KML_NS}}}name").text = sanitize_text(document_name or path.stem)
ET.SubElement(doc, f"{{{KML_NS}}}open").text = "1"
grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
group_meta: dict[str, dict[str, Any]] = {}
for r in records:
gid = sanitize_text(r.get("group_id") or "ungrouped")
grouped[gid].append(r)
group_meta.setdefault(gid, r)
for gid, sample in sorted(group_meta.items(), key=lambda kv: sanitize_text(kv[1].get("group_label") or kv[0]).casefold()):
sid = "style_" + kml_id(gid)
style = ET.SubElement(doc, f"{{{KML_NS}}}Style", {"id": sid})
icon_style = ET.SubElement(style, f"{{{KML_NS}}}IconStyle")
ET.SubElement(icon_style, f"{{{KML_NS}}}color").text = kml_color(sample.get("color"))
ET.SubElement(icon_style, f"{{{KML_NS}}}scale").text = f"{icon_scale:g}"
icon = ET.SubElement(icon_style, f"{{{KML_NS}}}Icon")
ET.SubElement(icon, f"{{{KML_NS}}}href").text = kml_icon_href(str(sample.get("icon") or "marker"))
label_style = ET.SubElement(style, f"{{{KML_NS}}}LabelStyle")
ET.SubElement(label_style, f"{{{KML_NS}}}color").text = kml_color(sample.get("color"))
ET.SubElement(label_style, f"{{{KML_NS}}}scale").text = f"{label_scale:g}"
for gid, group_records in sorted(grouped.items(), key=lambda kv: sanitize_text(group_meta[kv[0]].get("group_label") or kv[0]).casefold()):
sample = group_meta[gid]
folder = ET.SubElement(doc, f"{{{KML_NS}}}Folder")
ET.SubElement(folder, f"{{{KML_NS}}}name").text = f"{sanitize_text(sample.get('group_label') or gid)} ({len(group_records)})"
ET.SubElement(folder, f"{{{KML_NS}}}visibility").text = "1" if folder_visibility else "0"
ET.SubElement(folder, f"{{{KML_NS}}}open").text = "1" if folder_open else "0"
for r in group_records:
pm = ET.SubElement(folder, f"{{{KML_NS}}}Placemark")
ET.SubElement(pm, f"{{{KML_NS}}}name").text = sanitize_text(r.get("name") or r.get("short_hash") or "Point")
ET.SubElement(pm, f"{{{KML_NS}}}styleUrl").text = "#style_" + kml_id(gid)
ET.SubElement(pm, f"{{{KML_NS}}}description").text = html_description(r)
if include_extended_data:
ext = ET.SubElement(pm, f"{{{KML_NS}}}ExtendedData")
for k, v in sorted((r.get("props") or {}).items()):
data = ET.SubElement(ext, f"{{{KML_NS}}}Data", {"name": sanitize_text(k)})
ET.SubElement(data, f"{{{KML_NS}}}value").text = sanitize_text(v)
data = ET.SubElement(ext, f"{{{KML_NS}}}Data", {"name": "stage2:group_id"})
ET.SubElement(data, f"{{{KML_NS}}}value").text = sanitize_text(r.get("group_id"))
data = ET.SubElement(ext, f"{{{KML_NS}}}Data", {"name": "stage2:stable_hash"})
ET.SubElement(data, f"{{{KML_NS}}}value").text = sanitize_text(r.get("short_hash"))
point = ET.SubElement(pm, f"{{{KML_NS}}}Point")
ET.SubElement(point, f"{{{KML_NS}}}coordinates").text = f"{float(r['lon']):.8f},{float(r['lat']):.8f},0"
tree = ET.ElementTree(kml)
try:
ET.indent(tree, space=" ")
except Exception:
pass
with path.open("wb") as f:
tree.write(f, encoding="utf-8", xml_declaration=True)
def write_kmz(records: list[dict[str, Any]], path: Path, **kwargs) -> None:
import tempfile
with tempfile.TemporaryDirectory() as td:
tmp = Path(td) / "doc.kml"
write_kml(records, tmp, document_name=kwargs.pop("document_name", path.stem), **kwargs)
with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
zf.write(tmp, "doc.kml")
def write_kml_split_dir(records: list[dict[str, Any]], outdir: Path, *, kmz: bool = False, **kwargs) -> None:
outdir.mkdir(parents=True, exist_ok=True)
grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
labels: dict[str, str] = {}
for r in records:
gid = sanitize_text(r.get("group_id") or "ungrouped")
grouped[gid].append(r)
labels.setdefault(gid, sanitize_text(r.get("group_label") or gid))
manifest = []
base_kwargs = dict(kwargs)
base_kwargs.pop("document_name", None)
for gid, recs in sorted(grouped.items(), key=lambda kv: labels.get(kv[0], kv[0]).casefold()):
stem = safe_filename(f"{labels.get(gid, gid)}__{gid}")
path = outdir / f"{stem}.{'kmz' if kmz else 'kml'}"
if kmz:
write_kmz(recs, path, document_name=labels.get(gid, gid), **base_kwargs)
else:
write_kml(recs, path, document_name=labels.get(gid, gid), **base_kwargs)
manifest.append({"group_id": gid, "group_label": labels.get(gid, gid), "count": len(recs), "file": path.name})
save_json(outdir / "manifest.json", {"groups": manifest, "file_type": "kmz" if kmz else "kml"})
def main(argv=None): def main(argv=None):
ap = argparse.ArgumentParser(description="Stage 2 packager: analyze landmark CSVs into configurable group definitions, then build OSM/GPX with stable hashed names and styles.") ap = argparse.ArgumentParser(description="Stage 2 packager: analyze landmark CSVs into configurable group definitions, then build OSM/GPX with stable hashed names and styles.")
sub = ap.add_subparsers(dest="cmd", required=True) sub = ap.add_subparsers(dest="cmd", required=True)
@@ -1013,6 +1187,16 @@ def main(argv=None):
b.add_argument("--resolved-csv", type=Path) b.add_argument("--resolved-csv", type=Path)
b.add_argument("--osm", type=Path) b.add_argument("--osm", type=Path)
b.add_argument("--gpx", type=Path) b.add_argument("--gpx", type=Path)
b.add_argument("--kml", type=Path, help="Write Google Earth KML with one toggleable Folder per configured group")
b.add_argument("--kmz", type=Path, help="Write Google Earth KMZ with one toggleable Folder per configured group")
b.add_argument("--kml-split-dir", type=Path, help="Write one KML per group plus manifest.json for selective loading")
b.add_argument("--kmz-split-dir", type=Path, help="Write one KMZ per group plus manifest.json for selective loading")
b.add_argument("--kml-document-name", help="Document name shown in Google Earth")
b.add_argument("--kml-folder-visibility", type=int, choices=[0,1], default=1, help="Initial visibility for group folders")
b.add_argument("--kml-folder-open", type=int, choices=[0,1], default=0, help="Initial expanded/collapsed state for group folders")
b.add_argument("--kml-label-scale", type=float, default=0.0, help="Google Earth label scale; 0 hides always-on labels for performance")
b.add_argument("--kml-icon-scale", type=float, default=1.1)
b.add_argument("--kml-no-extended-data", action="store_true", help="Do not write per-point ExtendedData properties")
b.add_argument("--summary-json", type=Path) b.add_argument("--summary-json", type=Path)
b.add_argument("--unmatched-csv", type=Path, help="Write unmatched input landmarks for debugging config misses") b.add_argument("--unmatched-csv", type=Path, help="Write unmatched input landmarks for debugging config misses")
b.add_argument("--interactive-edit-config", action="store_true", help="Review/edit naming and postprocess tags before build") b.add_argument("--interactive-edit-config", action="store_true", help="Review/edit naming and postprocess tags before build")
@@ -1060,7 +1244,7 @@ def main(argv=None):
radius = args.dedupe_radius_m if args.dedupe_radius_m is not None else float(dedupe_cfg.get("radius_m", 12.0)) radius = args.dedupe_radius_m if args.dedupe_radius_m is not None else float(dedupe_cfg.get("radius_m", 12.0))
items = dedupe(items, radius_m=radius, mode=mode) items = dedupe(items, radius_m=radius, mode=mode)
records, unmatched = build_records(items, cfg) records, unmatched = build_records(items, cfg)
if not any([args.resolved_csv, args.osm, args.gpx, args.summary_json, args.unmatched_csv]): if not any([args.resolved_csv, args.osm, args.gpx, args.kml, args.kmz, args.kml_split_dir, args.kmz_split_dir, args.summary_json, args.unmatched_csv]):
print(f"[info] built {len(records)} records") print(f"[info] built {len(records)} records")
print(f"[info] unmatched {len(unmatched)} landmarks") print(f"[info] unmatched {len(unmatched)} landmarks")
return 0 return 0
@@ -1073,6 +1257,26 @@ def main(argv=None):
if args.gpx: if args.gpx:
write_gpx(records, args.gpx) write_gpx(records, args.gpx)
print(f"[info] wrote GPX: {args.gpx}") print(f"[info] wrote GPX: {args.gpx}")
kml_kwargs = {
"document_name": args.kml_document_name,
"folder_visibility": args.kml_folder_visibility,
"folder_open": args.kml_folder_open,
"label_scale": args.kml_label_scale,
"icon_scale": args.kml_icon_scale,
"include_extended_data": not args.kml_no_extended_data,
}
if args.kml:
write_kml(records, args.kml, **kml_kwargs)
print(f"[info] wrote Google Earth KML: {args.kml}")
if args.kmz:
write_kmz(records, args.kmz, **kml_kwargs)
print(f"[info] wrote Google Earth KMZ: {args.kmz}")
if args.kml_split_dir:
write_kml_split_dir(records, args.kml_split_dir, kmz=False, **kml_kwargs)
print(f"[info] wrote per-group Google Earth KML files: {args.kml_split_dir}")
if args.kmz_split_dir:
write_kml_split_dir(records, args.kmz_split_dir, kmz=True, **kml_kwargs)
print(f"[info] wrote per-group Google Earth KMZ files: {args.kmz_split_dir}")
if args.unmatched_csv: if args.unmatched_csv:
write_unmatched_csv(unmatched, args.unmatched_csv) write_unmatched_csv(unmatched, args.unmatched_csv)
print(f"[info] wrote unmatched CSV: {args.unmatched_csv}") print(f"[info] wrote unmatched CSV: {args.unmatched_csv}")

View File

@@ -90,3 +90,92 @@ That gives you exactly the pattern you asked for:
* keep everything in the same config file * keep everything in the same config file
The next strong upgrade would be adding a dedicated `--run-cmd-before-eval-on-enter/--no-auto-open` toggle, so you can choose whether each group auto-opens the first example or only opens when you press `o`. The next strong upgrade would be adding a dedicated `--run-cmd-before-eval-on-enter/--no-auto-open` toggle, so you can choose whether each group auto-opens the first example or only opens when you press `o`.
Implemented it in stage 2.
Download the upgraded script here:
[landmarks_csv_to_osmand_google_earth.py](sandbox:/mnt/data/landmarks_csv_to_osmand_google_earth.py)
I added Google Earth output on top of the existing stage-2 config workflow. Stage 1 already exports the needed `point_group_key`, raw Garmin type, semantic JSON, image metadata, and coordinates into CSV, so stage 2 can reuse that directly. The existing stage-2 logic already resolves records through the config, applying group assignment, naming, style, postprocess tags, and stable hashes, so I hooked Google Earth export after that same resolved-record stage.
New outputs:
```bash
--kml output.kml
--kmz output.kmz
--kml-split-dir google-earth-kml-groups
--kmz-split-dir google-earth-kmz-groups
```
Each KML/KMZ contains:
* one Google Earth **Folder per configured group**
* toggleable components in the Google Earth sidebar
* per-group KML styles generated from your config color/icon
* point name from your stage-2 naming logic
* description table with Garmin/source/hash/config metadata
* `ExtendedData` for machine-readable properties
Example build:
```bash
python landmarks_csv_to_osmand_google_earth.py build parsed-landmarks/csv/*.csv ^
--config stage2_config.json ^
--kml bgmountains_landmarks.kml ^
--kmz bgmountains_landmarks.kmz ^
--resolved-csv resolved.csv ^
--summary-json build_summary.json
```
For large datasets, use this instead:
```bash
python landmarks_csv_to_osmand_google_earth.py build parsed-landmarks/csv/*.csv ^
--config stage2_config.json ^
--kmz-split-dir google-earth-groups ^
--kml-folder-visibility 0 ^
--kml-label-scale 0
```
That creates one `.kmz` per group plus:
```text
manifest.json
```
This is better for Google Earth when you have tens of thousands of points, because you can load only the group you are inspecting.
Useful options added:
```bash
--kml-folder-visibility 0
```
Starts all folders hidden, so you enable groups manually in Google Earth.
```bash
--kml-folder-open 0
```
Keeps folders collapsed.
```bash
--kml-label-scale 0
```
Hides always-on labels. This is the default because 100k labels will destroy usability.
```bash
--kml-label-scale 0.7
```
Shows labels if you want them.
```bash
--kml-no-extended-data
```
Makes smaller KML/KMZ files by removing the full property dump.
The KML export uses the same config-controlled naming/postprocessing path as your existing OSM/GPX generation. The previous stage-2 build command already supported OSM, GPX, resolved CSV, and unmatched CSV outputs; I added KML/KMZ into that same build output branch.