v.5.0.1 - actually adding them
This commit is contained in:
@@ -93,6 +93,90 @@ def read_u32le(buf: bytes, off: int) -> int:
|
|||||||
return int.from_bytes(buf[off:off + 4], "little", signed=False)
|
return int.from_bytes(buf[off:off + 4], "little", signed=False)
|
||||||
|
|
||||||
|
|
||||||
|
def byte_size(val: int) -> int:
|
||||||
|
if val <= 0xFF:
|
||||||
|
return 1
|
||||||
|
if val <= 0xFFFF:
|
||||||
|
return 2
|
||||||
|
if val <= 0xFFFFFF:
|
||||||
|
return 3
|
||||||
|
return 4
|
||||||
|
|
||||||
|
|
||||||
|
def read_vuint32_fixed(data: bytes, pos: int, nbytes: int) -> Tuple[int, int]:
|
||||||
|
if nbytes < 1 or nbytes > 4 or pos + nbytes > len(data):
|
||||||
|
raise EOFError
|
||||||
|
return int.from_bytes(data[pos:pos + nbytes], "little", signed=False), pos + nbytes
|
||||||
|
|
||||||
|
|
||||||
|
def read_vuint32_auto(data: bytes, pos: int) -> Tuple[int, int]:
|
||||||
|
if pos >= len(data):
|
||||||
|
raise EOFError
|
||||||
|
b = data[pos]
|
||||||
|
pos += 1
|
||||||
|
if (b & 1) == 0:
|
||||||
|
if (b & 2) == 0:
|
||||||
|
nbytes = (((b >> 2) & 1) ^ 3)
|
||||||
|
shift = 5
|
||||||
|
else:
|
||||||
|
shift = 6
|
||||||
|
nbytes = 1
|
||||||
|
else:
|
||||||
|
shift = 7
|
||||||
|
nbytes = 0
|
||||||
|
val = b >> (8 - shift)
|
||||||
|
for i in range(1, nbytes + 1):
|
||||||
|
if pos >= len(data):
|
||||||
|
raise EOFError
|
||||||
|
b = data[pos]
|
||||||
|
pos += 1
|
||||||
|
val |= ((b << (i * 8)) >> (8 - shift))
|
||||||
|
return val, pos
|
||||||
|
|
||||||
|
|
||||||
|
def read_vbitfield32(data: bytes, pos: int) -> Tuple[int, int]:
|
||||||
|
if pos >= len(data):
|
||||||
|
raise EOFError
|
||||||
|
bits = data[pos]
|
||||||
|
if not (bits & 1):
|
||||||
|
if not ((bits >> 1) & 1):
|
||||||
|
if not ((bits >> 2) & 1):
|
||||||
|
if pos + 4 > len(data):
|
||||||
|
raise EOFError
|
||||||
|
bitfield = read_u32le(data, pos) >> 3
|
||||||
|
pos += 4
|
||||||
|
else:
|
||||||
|
if pos + 3 > len(data):
|
||||||
|
raise EOFError
|
||||||
|
bitfield = read_u24le(data, pos) >> 3
|
||||||
|
pos += 3
|
||||||
|
else:
|
||||||
|
if pos + 2 > len(data):
|
||||||
|
raise EOFError
|
||||||
|
bitfield = read_u16le(data, pos) >> 2
|
||||||
|
pos += 2
|
||||||
|
else:
|
||||||
|
bitfield = bits >> 1
|
||||||
|
pos += 1
|
||||||
|
return bitfield, pos
|
||||||
|
|
||||||
|
|
||||||
|
def guess_blob_ext(blob: bytes) -> str:
|
||||||
|
if blob.startswith(b"\x89PNG\r\n\x1a\n"):
|
||||||
|
return ".png"
|
||||||
|
if blob.startswith(b"\xff\xd8\xff"):
|
||||||
|
return ".jpg"
|
||||||
|
if blob.startswith(b"GIF87a") or blob.startswith(b"GIF89a"):
|
||||||
|
return ".gif"
|
||||||
|
if blob.startswith(b"BM"):
|
||||||
|
return ".bmp"
|
||||||
|
if blob.startswith(b"II*\x00") or blob.startswith(b"MM\x00*"):
|
||||||
|
return ".tif"
|
||||||
|
if blob.startswith(b"RIFF") and blob[8:12] == b"WEBP":
|
||||||
|
return ".webp"
|
||||||
|
return ".bin"
|
||||||
|
|
||||||
|
|
||||||
def to_deg(coord: int) -> float:
|
def to_deg(coord: int) -> float:
|
||||||
return coord * COORD_FACTOR
|
return coord * COORD_FACTOR
|
||||||
|
|
||||||
@@ -268,90 +352,184 @@ class LBL:
|
|||||||
def __init__(self, data: Optional[bytes]):
|
def __init__(self, data: Optional[bytes]):
|
||||||
self.data = data or b""
|
self.data = data or b""
|
||||||
self.ok = bool(data)
|
self.ok = bool(data)
|
||||||
self.data_offset = 0
|
self.header_length = 0
|
||||||
self.data_length = 0
|
self.base_offset = 0
|
||||||
|
self.base_size = 0
|
||||||
|
self.shift = 0
|
||||||
self.data_offset_multiplier = 1
|
self.data_offset_multiplier = 1
|
||||||
self.label_coding = 6
|
self.label_coding = 6
|
||||||
self.codepage = 1252
|
self.codepage = 1252
|
||||||
|
self.poi_offset = 0
|
||||||
|
self.poi_size = 0
|
||||||
|
self.poi_shift = 0
|
||||||
|
self._rasters: List[Tuple[int, int]] = []
|
||||||
|
self._img_offset = 0
|
||||||
|
self._img_size = 0
|
||||||
if self.ok:
|
if self.ok:
|
||||||
self._parse_header()
|
self._parse_header()
|
||||||
|
|
||||||
def _parse_header(self) -> None:
|
def _parse_header(self) -> None:
|
||||||
header_length = read_u16le(self.data, 0)
|
self.header_length = read_u16le(self.data, 0)
|
||||||
self.data_offset = read_u32le(self.data, 0x15)
|
self.base_offset = read_u32le(self.data, 0x15)
|
||||||
self.data_length = read_u32le(self.data, 0x19)
|
self.base_size = read_u32le(self.data, 0x19)
|
||||||
self.data_offset_multiplier = 1 << self.data[0x1D]
|
self.shift = self.data[0x1D] if len(self.data) > 0x1D else 0
|
||||||
self.label_coding = self.data[0x1E]
|
self.data_offset_multiplier = 1 << self.shift
|
||||||
|
self.label_coding = self.data[0x1E] if len(self.data) > 0x1E else 6
|
||||||
|
if len(self.data) >= 0x5C:
|
||||||
|
self.poi_offset = read_u32le(self.data, 0x57)
|
||||||
|
self.poi_size = read_u32le(self.data, 0x5B)
|
||||||
|
self.poi_shift = self.data[0x5F] if len(self.data) > 0x5F else 0
|
||||||
if len(self.data) >= 0xAC:
|
if len(self.data) >= 0xAC:
|
||||||
self.codepage = read_u16le(self.data, 0xAA)
|
self.codepage = read_u16le(self.data, 0xAA)
|
||||||
|
if self.header_length >= 0x19A and len(self.data) >= 0x19C:
|
||||||
|
table_offset = read_u32le(self.data, 0x184)
|
||||||
|
table_size = read_u32le(self.data, 0x188)
|
||||||
|
record_size = read_u16le(self.data, 0x18C)
|
||||||
|
self._img_offset = read_u32le(self.data, 0x194)
|
||||||
|
self._img_size = read_u32le(self.data, 0x198)
|
||||||
|
self._load_raster_table(table_offset, table_size, record_size)
|
||||||
|
|
||||||
def get_label(self, offset: int) -> str:
|
def _load_raster_table(self, offset: int, size: int, record_size: int) -> None:
|
||||||
|
if not (offset and size and record_size):
|
||||||
|
return
|
||||||
|
if offset < 0 or offset >= len(self.data) or record_size > 4:
|
||||||
|
return
|
||||||
|
img_count = size // record_size
|
||||||
|
if img_count <= 0 or self._img_offset <= 0 or self._img_offset >= len(self.data):
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
prev, pos = read_vuint32_fixed(self.data, offset, record_size)
|
||||||
|
rasters: List[Tuple[int, int]] = []
|
||||||
|
for _ in range(1, img_count):
|
||||||
|
cur, pos = read_vuint32_fixed(self.data, pos, record_size)
|
||||||
|
if cur < prev:
|
||||||
|
return
|
||||||
|
rasters.append((prev, cur - prev))
|
||||||
|
prev = cur
|
||||||
|
remaining = max(0, min(self._img_size, len(self.data) - self._img_offset) - prev)
|
||||||
|
rasters.append((prev, remaining))
|
||||||
|
self._rasters = rasters
|
||||||
|
except Exception:
|
||||||
|
self._rasters = []
|
||||||
|
|
||||||
|
def has_images(self) -> bool:
|
||||||
|
return bool(self._rasters)
|
||||||
|
|
||||||
|
def image_count(self) -> int:
|
||||||
|
return len(self._rasters)
|
||||||
|
|
||||||
|
def get_image(self, image_id: int) -> bytes:
|
||||||
|
if image_id < 0 or image_id >= len(self._rasters):
|
||||||
|
return b''
|
||||||
|
off, size = self._rasters[image_id]
|
||||||
|
start = self._img_offset + off
|
||||||
|
end = start + size
|
||||||
|
if start < 0 or end > len(self.data) or start >= end:
|
||||||
|
return b''
|
||||||
|
return self.data[start:end]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _sanitize_text(s: str) -> str:
|
||||||
|
s = ''.join(ch for ch in s if ord(ch) >= 0x20 or ch in '\t\n\r')
|
||||||
|
return s.strip().replace('\x00', '')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_all_upper_case(s: str) -> bool:
|
||||||
|
if not s:
|
||||||
|
return False
|
||||||
|
found = False
|
||||||
|
for ch in s:
|
||||||
|
if ch.isalpha():
|
||||||
|
found = True
|
||||||
|
if not ch.isupper():
|
||||||
|
return False
|
||||||
|
return found
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _capitalized(s: str) -> str:
|
||||||
|
if not s:
|
||||||
|
return s
|
||||||
|
out = [s[0]]
|
||||||
|
for i in range(1, len(s)):
|
||||||
|
prev = s[i - 1]
|
||||||
|
c = s[i]
|
||||||
|
out.append(c if (prev.isspace() or prev in '(\"') else c.lower())
|
||||||
|
return ''.join(out)
|
||||||
|
|
||||||
|
def _postprocess(self, text: str, capitalize: bool = True) -> str:
|
||||||
|
text = self._sanitize_text(text)
|
||||||
|
if capitalize and self._is_all_upper_case(text):
|
||||||
|
text = self._capitalized(text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def get_label(self, offset: int, poi: bool = False, capitalize: bool = True) -> str:
|
||||||
if not self.ok or offset == 0:
|
if not self.ok or offset == 0:
|
||||||
return ""
|
return ""
|
||||||
actual = self.data_offset + offset * self.data_offset_multiplier
|
if poi:
|
||||||
|
entry = self.poi_offset + (offset << self.poi_shift)
|
||||||
|
if entry < 0 or entry + 3 > len(self.data):
|
||||||
|
return ""
|
||||||
|
poi_ptr = read_u24le(self.data, entry) & 0x3FFFFF
|
||||||
|
actual = self.base_offset + (poi_ptr << self.shift)
|
||||||
|
else:
|
||||||
|
actual = self.base_offset + (offset << self.shift)
|
||||||
if actual < 0 or actual >= len(self.data):
|
if actual < 0 or actual >= len(self.data):
|
||||||
return ""
|
return ""
|
||||||
if self.label_coding == 6:
|
if self.label_coding == 6:
|
||||||
return self._get_label6(actual)
|
return self._get_label6(actual, capitalize=capitalize)
|
||||||
return self._get_label8_10(actual)
|
return self._get_label8_10(actual, capitalize=capitalize)
|
||||||
|
|
||||||
def _get_label8_10(self, off: int) -> str:
|
def _decode_bytes(self, raw: bytes) -> str:
|
||||||
|
cp = self.codepage
|
||||||
|
if cp in (0, 850):
|
||||||
|
enc = 'cp1252'
|
||||||
|
elif cp == 65001:
|
||||||
|
enc = 'utf-8'
|
||||||
|
elif cp == 932:
|
||||||
|
enc = 'cp932'
|
||||||
|
elif cp == 950:
|
||||||
|
enc = 'big5'
|
||||||
|
else:
|
||||||
|
enc = f'cp{cp}'
|
||||||
|
try:
|
||||||
|
return raw.decode(enc, errors='replace')
|
||||||
|
except Exception:
|
||||||
|
return raw.decode('latin1', errors='replace')
|
||||||
|
|
||||||
|
def _get_label8_10(self, off: int, capitalize: bool = True) -> str:
|
||||||
end = off
|
end = off
|
||||||
while end < len(self.data) and self.data[end] != 0:
|
while end < len(self.data) and self.data[end] != 0:
|
||||||
end += 1
|
end += 1
|
||||||
raw = self.data[off:end]
|
raw = self.data[off:end]
|
||||||
enc = None
|
return self._postprocess(self._decode_bytes(raw), capitalize=capitalize)
|
||||||
cp = self.codepage
|
|
||||||
if cp in (0, 850):
|
|
||||||
enc = "cp1252"
|
|
||||||
elif cp == 65001:
|
|
||||||
enc = "utf-8"
|
|
||||||
elif cp == 932:
|
|
||||||
enc = "cp932"
|
|
||||||
elif cp == 950:
|
|
||||||
enc = "big5"
|
|
||||||
else:
|
|
||||||
enc = f"cp{cp}"
|
|
||||||
try:
|
|
||||||
return raw.decode(enc, errors="replace")
|
|
||||||
except Exception:
|
|
||||||
return raw.decode("latin1", errors="replace")
|
|
||||||
|
|
||||||
def _get_label6(self, off: int) -> str:
|
def _get_label6(self, off: int, capitalize: bool = True) -> str:
|
||||||
out: List[str] = []
|
out: List[str] = []
|
||||||
charset = "NORMAL"
|
charset = 'NORMAL'
|
||||||
pos = off
|
pos = off
|
||||||
while pos + 3 <= len(self.data):
|
while pos + 3 <= len(self.data):
|
||||||
b1, b2, b3 = self.data[pos], self.data[pos + 1], self.data[pos + 2]
|
b1, b2, b3 = self.data[pos], self.data[pos + 1], self.data[pos + 2]
|
||||||
pos += 3
|
pos += 3
|
||||||
codes = [
|
codes = [b1 >> 2, ((b1 & 0x3) << 4) | (b2 >> 4), ((b2 & 0xF) << 2) | (b3 >> 6), b3 & 0x3F]
|
||||||
b1 >> 2,
|
|
||||||
((b1 & 0x3) << 4) | (b2 >> 4),
|
|
||||||
((b2 & 0xF) << 2) | (b3 >> 6),
|
|
||||||
b3 & 0x3F,
|
|
||||||
]
|
|
||||||
for c in codes:
|
for c in codes:
|
||||||
if c > 0x2F:
|
if c > 0x2F or (charset == 'NORMAL' and c == 0x1D):
|
||||||
return "".join(out).strip()
|
return self._postprocess(''.join(out), capitalize=capitalize)
|
||||||
if charset == "NORMAL":
|
if charset == 'NORMAL':
|
||||||
if c == 0x1C:
|
if c == 0x1C:
|
||||||
charset = "SYMBOL"
|
charset = 'SYMBOL'
|
||||||
elif c == 0x1B:
|
elif c == 0x1B:
|
||||||
charset = "SPECIAL"
|
charset = 'SPECIAL'
|
||||||
elif c == 0x1D:
|
|
||||||
out.append("|")
|
|
||||||
elif c in (0x1E, 0x1F):
|
elif c in (0x1E, 0x1F):
|
||||||
out.append(" ")
|
out.append(' ')
|
||||||
else:
|
else:
|
||||||
out.append(self.NORMAL_CHARS[c])
|
out.append(self.NORMAL_CHARS[c])
|
||||||
elif charset == "SYMBOL":
|
elif charset == 'SYMBOL':
|
||||||
out.append(self.SYMBOL_CHARS[c])
|
out.append(self.SYMBOL_CHARS[c])
|
||||||
charset = "NORMAL"
|
charset = 'NORMAL'
|
||||||
else:
|
else:
|
||||||
out.append(self.SPECIAL_CHARS[c])
|
out.append(self.SPECIAL_CHARS[c])
|
||||||
charset = "NORMAL"
|
charset = 'NORMAL'
|
||||||
return "".join(out).strip()
|
return self._postprocess(''.join(out), capitalize=capitalize)
|
||||||
|
|
||||||
|
|
||||||
# -------------------------
|
# -------------------------
|
||||||
# TRE parser
|
# TRE parser
|
||||||
@@ -628,6 +806,86 @@ class RGN:
|
|||||||
self.ext_line_length = read_u32le(data, 0x3D) if len(data) >= 0x41 else 0
|
self.ext_line_length = read_u32le(data, 0x3D) if len(data) >= 0x41 else 0
|
||||||
self.ext_poi_offset = read_u32le(data, 0x55) if len(data) >= 0x5D else 0
|
self.ext_poi_offset = read_u32le(data, 0x55) if len(data) >= 0x5D else 0
|
||||||
self.ext_poi_length = read_u32le(data, 0x59) if len(data) >= 0x5D else 0
|
self.ext_poi_length = read_u32le(data, 0x59) if len(data) >= 0x5D else 0
|
||||||
|
self.polygons_gbl_flags = 0
|
||||||
|
self.polygons_lcl_flags = [0, 0, 0]
|
||||||
|
self.lines_gbl_flags = 0
|
||||||
|
self.lines_lcl_flags = [0, 0, 0]
|
||||||
|
self.points_gbl_flags = 0
|
||||||
|
self.points_lcl_flags = [0, 0, 0]
|
||||||
|
if self.header_length >= 0x71 and len(data) >= 0x71:
|
||||||
|
try:
|
||||||
|
self.polygons_gbl_flags = read_u32le(data, 0x29)
|
||||||
|
self.polygons_lcl_flags = [read_u32le(data, 0x2D), read_u32le(data, 0x31), read_u32le(data, 0x35)]
|
||||||
|
self.lines_gbl_flags = read_u32le(data, 0x45)
|
||||||
|
self.lines_lcl_flags = [read_u32le(data, 0x49), read_u32le(data, 0x4D), read_u32le(data, 0x51)]
|
||||||
|
self.points_gbl_flags = read_u32le(data, 0x61)
|
||||||
|
self.points_lcl_flags = [read_u32le(data, 0x65), read_u32le(data, 0x69), read_u32le(data, 0x6D)]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self.segment_stats: Counter[str] = Counter()
|
||||||
|
self.segment_errors: List[Dict[str, object]] = []
|
||||||
|
|
||||||
|
def _skip_global_fields(self, pos: int, flags: int) -> int:
|
||||||
|
cnt = 0
|
||||||
|
while flags:
|
||||||
|
cnt += (flags & 3)
|
||||||
|
flags >>= 2
|
||||||
|
return min(len(self.data), pos + cnt)
|
||||||
|
|
||||||
|
def _skip_class_fields(self, pos: int) -> int:
|
||||||
|
if pos >= len(self.data):
|
||||||
|
return pos
|
||||||
|
flags = self.data[pos]
|
||||||
|
pos += 1
|
||||||
|
size_mode = flags >> 5
|
||||||
|
if size_mode == 4:
|
||||||
|
rs = 1
|
||||||
|
elif size_mode == 5:
|
||||||
|
rs = 2
|
||||||
|
elif size_mode == 6:
|
||||||
|
rs = 3
|
||||||
|
elif size_mode == 7:
|
||||||
|
try:
|
||||||
|
rs, pos = read_vuint32_auto(self.data, pos)
|
||||||
|
except Exception:
|
||||||
|
return pos
|
||||||
|
else:
|
||||||
|
rs = 0
|
||||||
|
return min(len(self.data), pos + rs)
|
||||||
|
|
||||||
|
def _parse_local_fields_image(self, pos: int, flags: List[int]) -> Tuple[int, Optional[int]]:
|
||||||
|
image_id: Optional[int] = None
|
||||||
|
bitfield = 0xFFFFFFFF
|
||||||
|
if flags[0] & 0x20000000:
|
||||||
|
try:
|
||||||
|
bitfield, pos = read_vbitfield32(self.data, pos)
|
||||||
|
except Exception:
|
||||||
|
return pos, image_id
|
||||||
|
j = 0
|
||||||
|
for i in range(29):
|
||||||
|
if (flags[0] >> i) & 1:
|
||||||
|
if bitfield & 1:
|
||||||
|
m = (flags[(j >> 4) + 1] >> ((j * 2) & 0x1E)) & 3
|
||||||
|
if m == 3:
|
||||||
|
try:
|
||||||
|
size, pos = read_vuint32_auto(self.data, pos)
|
||||||
|
except Exception:
|
||||||
|
return pos, image_id
|
||||||
|
else:
|
||||||
|
size = m + 1
|
||||||
|
off = pos
|
||||||
|
if i == 3:
|
||||||
|
try:
|
||||||
|
if size == 1 and pos + 1 <= len(self.data):
|
||||||
|
image_id = self.data[pos]
|
||||||
|
elif size == 2 and pos + 2 <= len(self.data):
|
||||||
|
image_id = read_u16le(self.data, pos)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
pos = min(len(self.data), off + size)
|
||||||
|
bitfield >>= 1
|
||||||
|
j += 1
|
||||||
|
return pos, image_id
|
||||||
|
|
||||||
def data_end(self) -> int:
|
def data_end(self) -> int:
|
||||||
return self.data_length
|
return self.data_length
|
||||||
@@ -758,61 +1016,77 @@ class RGN:
|
|||||||
feats: List[Feature] = []
|
feats: List[Feature] = []
|
||||||
pos, end = seg
|
pos, end = seg
|
||||||
while pos < end and pos + 8 <= len(self.data):
|
while pos < end and pos + 8 <= len(self.data):
|
||||||
typ = self.data[pos]
|
try:
|
||||||
info24 = read_u24le(self.data, pos + 1)
|
typ = self.data[pos]
|
||||||
has_subtype = bool(info24 & 0x800000)
|
info24 = read_u24le(self.data, pos + 1)
|
||||||
is_poi = bool(info24 & 0x400000)
|
has_subtype = bool(info24 & 0x800000)
|
||||||
lbl_off = info24 & 0x3FFFFF
|
is_poi = bool(info24 & 0x400000)
|
||||||
lon_delta = read_s16le(self.data, pos + 4)
|
lbl_off = info24 & 0x3FFFFF
|
||||||
lat_delta = read_s16le(self.data, pos + 6)
|
lon_delta = read_s16le(self.data, pos + 4)
|
||||||
pos += 8
|
lat_delta = read_s16le(self.data, pos + 6)
|
||||||
subtype = 0
|
pos += 8
|
||||||
if has_subtype and pos < end:
|
subtype = 0
|
||||||
subtype = self.data[pos]
|
if has_subtype and pos < end:
|
||||||
pos += 1
|
subtype = self.data[pos]
|
||||||
name = self.lbl.get_label(lbl_off) if lbl_off else ""
|
pos += 1
|
||||||
lon = to_deg(self._subdiv_lon(sub, lon_delta, 0))
|
name = self.lbl.get_label(lbl_off, poi=is_poi, capitalize=not (0x1400 <= (typ << 8 | subtype) <= 0x153F)) if lbl_off else ""
|
||||||
lat = to_deg(self._subdiv_lat(sub, lat_delta, 0))
|
lon = to_deg(self._subdiv_lon(sub, lon_delta, 0))
|
||||||
feats.append(Feature(
|
lat = to_deg(self._subdiv_lat(sub, lat_delta, 0))
|
||||||
geom_type="Point",
|
feats.append(Feature(
|
||||||
coords=[lon, lat],
|
geom_type="Point",
|
||||||
props={
|
coords=[lon, lat],
|
||||||
"garmin_kind": "indexed_point" if indexed else "point",
|
props={
|
||||||
"garmin_type": f"0x{typ:02x}",
|
"garmin_kind": "indexed_point" if indexed else "point",
|
||||||
"garmin_subtype": f"0x{subtype:02x}",
|
"garmin_type": f"0x{typ:02x}",
|
||||||
"garmin_is_poi": is_poi,
|
"garmin_subtype": f"0x{subtype:02x}",
|
||||||
"name": name,
|
"garmin_is_poi": is_poi,
|
||||||
},
|
"name": name,
|
||||||
))
|
},
|
||||||
|
))
|
||||||
|
except Exception as e:
|
||||||
|
self.segment_errors.append({"segment": "point", "subdivision": sub.index, "offset": pos, "error": str(e)})
|
||||||
|
break
|
||||||
|
self.segment_stats['point' if not indexed else 'indexed_point'] += len(feats)
|
||||||
return feats
|
return feats
|
||||||
|
|
||||||
def _parse_ext_points(self, sub: Subdivision, seg: Tuple[int, int]) -> List[Feature]:
|
def _parse_ext_points(self, sub: Subdivision, seg: Tuple[int, int]) -> List[Feature]:
|
||||||
feats: List[Feature] = []
|
feats: List[Feature] = []
|
||||||
pos, end = seg
|
pos, end = seg
|
||||||
while pos < end and pos + 6 <= len(self.data):
|
while pos < end and pos + 6 <= len(self.data):
|
||||||
typ = self.data[pos]
|
try:
|
||||||
subtype_raw = self.data[pos + 1]
|
typ = self.data[pos]
|
||||||
has_lbl = bool(subtype_raw & 0x20)
|
subtype_raw = self.data[pos + 1]
|
||||||
subtype = subtype_raw % 32
|
has_lbl = bool(subtype_raw & 0x20)
|
||||||
full_type = ((typ + 0x100) << 8) + subtype
|
subtype = subtype_raw % 32
|
||||||
lon_delta = read_s16le(self.data, pos + 2)
|
full_type = ((typ + 0x100) << 8) + subtype
|
||||||
lat_delta = read_s16le(self.data, pos + 4)
|
lon_delta = read_s16le(self.data, pos + 2)
|
||||||
pos += 6
|
lat_delta = read_s16le(self.data, pos + 4)
|
||||||
lbl_off = read_u24le(self.data, pos) if has_lbl and pos + 3 <= end else 0
|
pos += 6
|
||||||
if has_lbl:
|
lbl_off = read_u24le(self.data, pos) if has_lbl and pos + 3 <= end else 0
|
||||||
pos += 3
|
if has_lbl:
|
||||||
name = self.lbl.get_label(lbl_off) if lbl_off else ""
|
pos += 3
|
||||||
lon = to_deg(self._subdiv_lon(sub, lon_delta, 0))
|
if subtype_raw & 0x80:
|
||||||
lat = to_deg(self._subdiv_lat(sub, lat_delta, 0))
|
pos = self._skip_class_fields(pos)
|
||||||
feats.append(Feature(
|
image_id = None
|
||||||
geom_type="Point",
|
if subtype_raw & 0x40:
|
||||||
coords=[lon, lat],
|
pos, image_id = self._parse_local_fields_image(pos, self.points_lcl_flags)
|
||||||
props={
|
if self.points_gbl_flags:
|
||||||
|
pos = self._skip_global_fields(pos, self.points_gbl_flags)
|
||||||
|
name = self.lbl.get_label(lbl_off) if lbl_off else ""
|
||||||
|
lon = to_deg(self._subdiv_lon(sub, lon_delta, 0))
|
||||||
|
lat = to_deg(self._subdiv_lat(sub, lat_delta, 0))
|
||||||
|
props = {
|
||||||
"garmin_kind": "extended_point",
|
"garmin_kind": "extended_point",
|
||||||
"garmin_type": f"0x{full_type:04x}",
|
"garmin_type": f"0x{full_type:04x}",
|
||||||
"name": name,
|
"name": name,
|
||||||
},
|
}
|
||||||
))
|
if image_id is not None:
|
||||||
|
props["garmin_image_id"] = image_id
|
||||||
|
feats.append(Feature(geom_type="Point", coords=[lon, lat], props=props))
|
||||||
|
except Exception as e:
|
||||||
|
self.segment_errors.append({"segment": "extended_point", "subdivision": sub.index, "offset": pos, "error": str(e)})
|
||||||
|
break
|
||||||
|
self.segment_stats['extended_point'] += len(feats)
|
||||||
return feats
|
return feats
|
||||||
|
|
||||||
def _parse_poly(self, sub: Subdivision, seg: Tuple[int, int], line: bool, extended: bool) -> List[Feature]:
|
def _parse_poly(self, sub: Subdivision, seg: Tuple[int, int], line: bool, extended: bool) -> List[Feature]:
|
||||||
@@ -933,9 +1207,10 @@ class RGN:
|
|||||||
"name": name,
|
"name": name,
|
||||||
},
|
},
|
||||||
))
|
))
|
||||||
except Exception:
|
except Exception as e:
|
||||||
# Stop current segment on malformed data instead of crashing the whole file.
|
self.segment_errors.append({"segment": ("extended_" if extended else "") + ("polyline" if line else "polygon"), "subdivision": sub.index, "offset": pos, "error": str(e)})
|
||||||
break
|
break
|
||||||
|
self.segment_stats[("extended_" if extended else "") + ("polyline" if line else "polygon")] += len(feats)
|
||||||
return feats
|
return feats
|
||||||
|
|
||||||
|
|
||||||
@@ -1358,11 +1633,34 @@ def _node_key(lon: float, lat: float) -> Tuple[int, int]:
|
|||||||
return (int(round(lon * 1e7)), int(round(lat * 1e7)))
|
return (int(round(lon * 1e7)), int(round(lat * 1e7)))
|
||||||
|
|
||||||
|
|
||||||
def parse_mapset_features(mapset_name: str, subfiles: Dict[str, bytes]) -> Tuple[List[Feature], Dict[str, object]]:
|
def dump_lbl_images(lbl: LBL, mapset_name: str, outdir: Path) -> Dict[int, str]:
|
||||||
|
mapping: Dict[int, str] = {}
|
||||||
|
if not lbl.has_images():
|
||||||
|
return mapping
|
||||||
|
target = outdir / mapset_name
|
||||||
|
target.mkdir(parents=True, exist_ok=True)
|
||||||
|
for image_id in range(lbl.image_count()):
|
||||||
|
blob = lbl.get_image(image_id)
|
||||||
|
if not blob:
|
||||||
|
continue
|
||||||
|
ext = guess_blob_ext(blob)
|
||||||
|
name = f'image_{image_id:05d}{ext}'
|
||||||
|
(target / name).write_bytes(blob)
|
||||||
|
mapping[image_id] = str(Path(mapset_name) / name)
|
||||||
|
return mapping
|
||||||
|
|
||||||
|
|
||||||
|
def parse_mapset_features(mapset_name: str, subfiles: Dict[str, bytes], extract_images_dir: Optional[Path] = None) -> Tuple[List[Feature], Dict[str, object]]:
|
||||||
tre = TRE(subfiles['TRE'])
|
tre = TRE(subfiles['TRE'])
|
||||||
lbl = LBL(subfiles.get('LBL'))
|
lbl = LBL(subfiles.get('LBL'))
|
||||||
rgn = RGN(subfiles['RGN'], tre=tre, lbl=lbl)
|
rgn = RGN(subfiles['RGN'], tre=tre, lbl=lbl)
|
||||||
features = rgn.parse_features()
|
features = rgn.parse_features()
|
||||||
|
image_files = dump_lbl_images(lbl, mapset_name, extract_images_dir) if extract_images_dir else {}
|
||||||
|
for f in features:
|
||||||
|
f.props['mapset'] = mapset_name
|
||||||
|
image_id = f.props.get('garmin_image_id')
|
||||||
|
if image_id is not None and image_id in image_files:
|
||||||
|
f.props['garmin_image_file'] = image_files[image_id]
|
||||||
meta = {
|
meta = {
|
||||||
'mapset': mapset_name,
|
'mapset': mapset_name,
|
||||||
'bounds_wgs84': {
|
'bounds_wgs84': {
|
||||||
@@ -1373,6 +1671,11 @@ def parse_mapset_features(mapset_name: str, subfiles: Dict[str, bytes]) -> Tuple
|
|||||||
},
|
},
|
||||||
'feature_count': len(features),
|
'feature_count': len(features),
|
||||||
'levels': {lvl: {'bits_per_coord': li.bits_per_coord, 'inherited': li.inherited} for lvl, li in tre.levels.items()},
|
'levels': {lvl: {'bits_per_coord': li.bits_per_coord, 'inherited': li.inherited} for lvl, li in tre.levels.items()},
|
||||||
|
'segment_stats': dict(rgn.segment_stats),
|
||||||
|
'segment_error_count': len(rgn.segment_errors),
|
||||||
|
'segment_errors_preview': rgn.segment_errors[:20],
|
||||||
|
'embedded_image_count': lbl.image_count(),
|
||||||
|
'dumped_images': image_files,
|
||||||
}
|
}
|
||||||
return features, meta
|
return features, meta
|
||||||
|
|
||||||
@@ -1477,7 +1780,7 @@ def write_osm(features: List[Feature], path: Path, semantic: bool = True) -> Non
|
|||||||
|
|
||||||
def write_osm_from_img(img_path: Path, path: Path, mapsets: Optional[List[str]] = None,
|
def write_osm_from_img(img_path: Path, path: Path, mapsets: Optional[List[str]] = None,
|
||||||
bbox: Optional[Tuple[float, float, float, float]] = None,
|
bbox: Optional[Tuple[float, float, float, float]] = None,
|
||||||
semantic: bool = True) -> Dict[str, object]:
|
semantic: bool = True, extract_images_dir: Optional[Path] = None) -> Dict[str, object]:
|
||||||
raw = img_path.read_bytes()
|
raw = img_path.read_bytes()
|
||||||
container = ImgContainer(raw)
|
container = ImgContainer(raw)
|
||||||
all_sets = _all_mapsets(container.files)
|
all_sets = _all_mapsets(container.files)
|
||||||
@@ -1493,7 +1796,7 @@ def write_osm_from_img(img_path: Path, path: Path, mapsets: Optional[List[str]]
|
|||||||
for name, subs in all_sets.items():
|
for name, subs in all_sets.items():
|
||||||
if selected and name.upper() not in selected:
|
if selected and name.upper() not in selected:
|
||||||
continue
|
continue
|
||||||
feats, meta = parse_mapset_features(name, subs)
|
feats, meta = parse_mapset_features(name, subs, extract_images_dir=extract_images_dir)
|
||||||
if bbox is not None:
|
if bbox is not None:
|
||||||
feats = [f for f in feats if _intersects_bbox(f, bbox)]
|
feats = [f for f in feats if _intersects_bbox(f, bbox)]
|
||||||
meta['feature_count_after_bbox'] = len(feats)
|
meta['feature_count_after_bbox'] = len(feats)
|
||||||
@@ -1518,6 +1821,7 @@ def load_features_from_img(
|
|||||||
img_path: Path,
|
img_path: Path,
|
||||||
mapsets: Optional[List[str]] = None,
|
mapsets: Optional[List[str]] = None,
|
||||||
bbox: Optional[Tuple[float, float, float, float]] = None,
|
bbox: Optional[Tuple[float, float, float, float]] = None,
|
||||||
|
extract_images_dir: Optional[Path] = None,
|
||||||
) -> Tuple[List[Feature], Dict[str, object]]:
|
) -> Tuple[List[Feature], Dict[str, object]]:
|
||||||
raw = img_path.read_bytes()
|
raw = img_path.read_bytes()
|
||||||
container = ImgContainer(raw)
|
container = ImgContainer(raw)
|
||||||
@@ -1528,7 +1832,7 @@ def load_features_from_img(
|
|||||||
for name, subs in all_sets.items():
|
for name, subs in all_sets.items():
|
||||||
if selected and name.upper() not in selected:
|
if selected and name.upper() not in selected:
|
||||||
continue
|
continue
|
||||||
feats, meta = parse_mapset_features(name, subs)
|
feats, meta = parse_mapset_features(name, subs, extract_images_dir=extract_images_dir)
|
||||||
if bbox is not None:
|
if bbox is not None:
|
||||||
feats = [f for f in feats if _intersects_bbox(f, bbox)]
|
feats = [f for f in feats if _intersects_bbox(f, bbox)]
|
||||||
meta['feature_count_after_bbox'] = len(feats)
|
meta['feature_count_after_bbox'] = len(feats)
|
||||||
@@ -1630,19 +1934,22 @@ def _feature_point_row(f: Feature) -> Dict[str, object]:
|
|||||||
sem = semantic_tags_for_feature(f)
|
sem = semantic_tags_for_feature(f)
|
||||||
lon, lat = f.coords
|
lon, lat = f.coords
|
||||||
return {
|
return {
|
||||||
|
'mapset': f.props.get('mapset', ''),
|
||||||
'lon': lon,
|
'lon': lon,
|
||||||
'lat': lat,
|
'lat': lat,
|
||||||
'name': sem.get('name', ''),
|
'name': sem.get('name', ''),
|
||||||
'garmin_kind': f.props.get('garmin_kind', ''),
|
'garmin_kind': f.props.get('garmin_kind', ''),
|
||||||
'garmin_type': f.props.get('garmin_type', ''),
|
'garmin_type': f.props.get('garmin_type', ''),
|
||||||
'garmin_subtype': f.props.get('garmin_subtype', ''),
|
'garmin_subtype': f.props.get('garmin_subtype', ''),
|
||||||
|
'garmin_image_id': f.props.get('garmin_image_id', ''),
|
||||||
|
'garmin_image_file': f.props.get('garmin_image_file', ''),
|
||||||
'semantic_tags': sem,
|
'semantic_tags': sem,
|
||||||
'gpxsee_classes': gpxsee_classes_for_feature(f),
|
'gpxsee_classes': gpxsee_classes_for_feature(f),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def write_landmarks_csv(features: List[Feature], path: Path) -> None:
|
def write_landmarks_csv(features: List[Feature], path: Path) -> None:
|
||||||
fields = ['lon', 'lat', 'name', 'garmin_kind', 'garmin_type', 'garmin_subtype', 'gpxsee_classes_json', 'semantic_tags_json']
|
fields = ['mapset', 'lon', 'lat', 'name', 'garmin_kind', 'garmin_type', 'garmin_subtype', 'garmin_image_id', 'garmin_image_file', 'gpxsee_classes_json', 'semantic_tags_json']
|
||||||
if str(path).lower().endswith('.gz'):
|
if str(path).lower().endswith('.gz'):
|
||||||
fh = gzip.open(path, 'wt', encoding='utf-8', newline='')
|
fh = gzip.open(path, 'wt', encoding='utf-8', newline='')
|
||||||
else:
|
else:
|
||||||
@@ -1653,12 +1960,15 @@ def write_landmarks_csv(features: List[Feature], path: Path) -> None:
|
|||||||
for f in features:
|
for f in features:
|
||||||
row = _feature_point_row(f)
|
row = _feature_point_row(f)
|
||||||
w.writerow({
|
w.writerow({
|
||||||
|
'mapset': row['mapset'],
|
||||||
'lon': f'{row["lon"]:.8f}',
|
'lon': f'{row["lon"]:.8f}',
|
||||||
'lat': f'{row["lat"]:.8f}',
|
'lat': f'{row["lat"]:.8f}',
|
||||||
'name': row['name'],
|
'name': row['name'],
|
||||||
'garmin_kind': row['garmin_kind'],
|
'garmin_kind': row['garmin_kind'],
|
||||||
'garmin_type': row['garmin_type'],
|
'garmin_type': row['garmin_type'],
|
||||||
'garmin_subtype': row['garmin_subtype'],
|
'garmin_subtype': row['garmin_subtype'],
|
||||||
|
'garmin_image_id': row['garmin_image_id'],
|
||||||
|
'garmin_image_file': row['garmin_image_file'],
|
||||||
'gpxsee_classes_json': json.dumps(row['gpxsee_classes'], ensure_ascii=False),
|
'gpxsee_classes_json': json.dumps(row['gpxsee_classes'], ensure_ascii=False),
|
||||||
'semantic_tags_json': json.dumps(row['semantic_tags'], ensure_ascii=False, sort_keys=True),
|
'semantic_tags_json': json.dumps(row['semantic_tags'], ensure_ascii=False, sort_keys=True),
|
||||||
})
|
})
|
||||||
@@ -1672,10 +1982,13 @@ def write_landmarks_geojson(features: List[Feature], path: Path) -> None:
|
|||||||
for f in features:
|
for f in features:
|
||||||
row = _feature_point_row(f)
|
row = _feature_point_row(f)
|
||||||
props = {
|
props = {
|
||||||
|
'mapset': row['mapset'],
|
||||||
'name': row['name'],
|
'name': row['name'],
|
||||||
'garmin_kind': row['garmin_kind'],
|
'garmin_kind': row['garmin_kind'],
|
||||||
'garmin_type': row['garmin_type'],
|
'garmin_type': row['garmin_type'],
|
||||||
'garmin_subtype': row['garmin_subtype'],
|
'garmin_subtype': row['garmin_subtype'],
|
||||||
|
'garmin_image_id': row['garmin_image_id'],
|
||||||
|
'garmin_image_file': row['garmin_image_file'],
|
||||||
'gpxsee_classes': ','.join(row['gpxsee_classes']),
|
'gpxsee_classes': ','.join(row['gpxsee_classes']),
|
||||||
}
|
}
|
||||||
props.update(row['semantic_tags'])
|
props.update(row['semantic_tags'])
|
||||||
@@ -1707,6 +2020,46 @@ def print_feature_type_table(features: List[Feature], point_only: bool = False)
|
|||||||
str(row['sample_name']),
|
str(row['sample_name']),
|
||||||
]))
|
]))
|
||||||
|
|
||||||
|
|
||||||
|
def image_group_rows(features: List[Feature]) -> List[Dict[str, object]]:
|
||||||
|
groups: Dict[Tuple[str, str], Dict[str, object]] = {}
|
||||||
|
for f in features:
|
||||||
|
image_id = f.props.get('garmin_image_id')
|
||||||
|
if image_id is None:
|
||||||
|
continue
|
||||||
|
key = (str(f.props.get('mapset') or ''), str(image_id))
|
||||||
|
g = groups.setdefault(key, {
|
||||||
|
'mapset': key[0],
|
||||||
|
'garmin_image_id': image_id,
|
||||||
|
'count': 0,
|
||||||
|
'sample_name': '',
|
||||||
|
'sample_type': str(f.props.get('garmin_type') or ''),
|
||||||
|
'sample_subtype': str(f.props.get('garmin_subtype') or ''),
|
||||||
|
'garmin_image_file': str(f.props.get('garmin_image_file') or ''),
|
||||||
|
})
|
||||||
|
g['count'] += 1
|
||||||
|
if not g['sample_name']:
|
||||||
|
g['sample_name'] = str(f.props.get('name') or semantic_tags_for_feature(f).get('name') or '')
|
||||||
|
return sorted(groups.values(), key=lambda r: (-r['count'], r['mapset'], int(r['garmin_image_id'])))
|
||||||
|
|
||||||
|
|
||||||
|
def print_image_group_table(features: List[Feature]) -> None:
|
||||||
|
rows = image_group_rows(features)
|
||||||
|
print('mapset garmin_image_id count garmin_image_file sample_type sample_subtype sample_name')
|
||||||
|
for r in rows:
|
||||||
|
print(' '.join([str(r['mapset']), str(r['garmin_image_id']), str(r['count']), str(r['garmin_image_file']), str(r['sample_type']), str(r['sample_subtype']), str(r['sample_name'])]))
|
||||||
|
|
||||||
|
|
||||||
|
def write_image_groups_csv(features: List[Feature], path: Path) -> None:
|
||||||
|
rows = image_group_rows(features)
|
||||||
|
fields = ['mapset','garmin_image_id','count','garmin_image_file','sample_type','sample_subtype','sample_name']
|
||||||
|
fh = gzip.open(path, 'wt', encoding='utf-8', newline='') if str(path).lower().endswith('.gz') else open(path, 'w', encoding='utf-8', newline='')
|
||||||
|
with fh:
|
||||||
|
w = csv.DictWriter(fh, fieldnames=fields)
|
||||||
|
w.writeheader()
|
||||||
|
for r in rows:
|
||||||
|
w.writerow(r)
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
ap = argparse.ArgumentParser(description='Extract vector features from a Garmin IMG and export GeoJSON / OSM XML suitable for further conversion to OsmAnd .obf.')
|
ap = argparse.ArgumentParser(description='Extract vector features from a Garmin IMG and export GeoJSON / OSM XML suitable for further conversion to OsmAnd .obf.')
|
||||||
ap.add_argument('img', type=Path, help='Input Garmin .img file')
|
ap.add_argument('img', type=Path, help='Input Garmin .img file')
|
||||||
@@ -1722,6 +2075,9 @@ def main() -> int:
|
|||||||
ap.add_argument('--landmark-types-json', type=Path, help='Export landmark type summary table to JSON or JSON.GZ')
|
ap.add_argument('--landmark-types-json', type=Path, help='Export landmark type summary table to JSON or JSON.GZ')
|
||||||
ap.add_argument('--landmarks-csv', type=Path, help='Export exact-coordinate point landmarks to CSV or CSV.GZ')
|
ap.add_argument('--landmarks-csv', type=Path, help='Export exact-coordinate point landmarks to CSV or CSV.GZ')
|
||||||
ap.add_argument('--landmarks-geojson', type=Path, help='Export exact-coordinate point landmarks to GeoJSON or GeoJSON.GZ')
|
ap.add_argument('--landmarks-geojson', type=Path, help='Export exact-coordinate point landmarks to GeoJSON or GeoJSON.GZ')
|
||||||
|
ap.add_argument('--list-image-groups', action='store_true', help='List extracted point image/icon groups with counts')
|
||||||
|
ap.add_argument('--image-groups-csv', type=Path, help='Export extracted point image/icon groups to CSV or CSV.GZ')
|
||||||
|
ap.add_argument('--extract-images-dir', type=Path, help='Best-effort dump of embedded Garmin image blobs by mapset')
|
||||||
ap.add_argument('--category', action='append', help='Filter landmarks/features by semantic category: water_sources, peaks, caves, settlements, water_landmarks, marine_points, depth_points, lights, buoys')
|
ap.add_argument('--category', action='append', help='Filter landmarks/features by semantic category: water_sources, peaks, caves, settlements, water_landmarks, marine_points, depth_points, lights, buoys')
|
||||||
ap.add_argument('--filter-kind', action='append', help='Filter by garmin kind, e.g. point, indexed_point, extended_point, polyline')
|
ap.add_argument('--filter-kind', action='append', help='Filter by garmin kind, e.g. point, indexed_point, extended_point, polyline')
|
||||||
ap.add_argument('--filter-type', action='append', help='Filter by Garmin type hex string, e.g. 0x64')
|
ap.add_argument('--filter-type', action='append', help='Filter by Garmin type hex string, e.g. 0x64')
|
||||||
@@ -1739,7 +2095,7 @@ def main() -> int:
|
|||||||
print(f'{name}\t{to_deg(tre.west):.6f},{to_deg(tre.south):.6f},{to_deg(tre.east):.6f},{to_deg(tre.north):.6f}')
|
print(f'{name}\t{to_deg(tre.west):.6f},{to_deg(tre.south):.6f},{to_deg(tre.east):.6f},{to_deg(tre.north):.6f}')
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if not args.geojson and not args.osm and not args.meta_json and not args.list_feature_types and not args.list_landmark_types and not args.landmark_types_csv and not args.landmark_types_json and not args.landmarks_csv and not args.landmarks_geojson:
|
if not args.geojson and not args.osm and not args.meta_json and not args.list_feature_types and not args.list_landmark_types and not args.landmark_types_csv and not args.landmark_types_json and not args.landmarks_csv and not args.landmarks_geojson and not args.list_image_groups and not args.image_groups_csv:
|
||||||
ap.error('provide at least one export/list option or use --list-mapsets')
|
ap.error('provide at least one export/list option or use --list-mapsets')
|
||||||
|
|
||||||
bbox = _parse_bbox(args.bbox)
|
bbox = _parse_bbox(args.bbox)
|
||||||
@@ -1747,7 +2103,7 @@ def main() -> int:
|
|||||||
|
|
||||||
# Fast streaming OSM path when no feature post-filtering is requested.
|
# Fast streaming OSM path when no feature post-filtering is requested.
|
||||||
if args.osm and not args.geojson and not args.list_feature_types and not args.list_landmark_types and not args.landmark_types_csv and not args.landmark_types_json and not args.landmarks_csv and not args.landmarks_geojson and not args.category and not args.filter_kind and not args.filter_type and not args.filter_subtype and not args.filter_tag and not args.gpxsee_class and not args.named_only:
|
if args.osm and not args.geojson and not args.list_feature_types and not args.list_landmark_types and not args.landmark_types_csv and not args.landmark_types_json and not args.landmarks_csv and not args.landmarks_geojson and not args.category and not args.filter_kind and not args.filter_type and not args.filter_subtype and not args.filter_tag and not args.gpxsee_class and not args.named_only:
|
||||||
meta = write_osm_from_img(args.img, args.osm, mapsets=args.mapset, bbox=bbox, semantic=not args.raw_only)
|
meta = write_osm_from_img(args.img, args.osm, mapsets=args.mapset, bbox=bbox, semantic=not args.raw_only, extract_images_dir=args.extract_images_dir)
|
||||||
info(f'parsed {meta.get("feature_count", 0)} features from {len(meta.get("mapsets", []))} mapsets')
|
info(f'parsed {meta.get("feature_count", 0)} features from {len(meta.get("mapsets", []))} mapsets')
|
||||||
info(f'wrote OSM XML: {args.osm}')
|
info(f'wrote OSM XML: {args.osm}')
|
||||||
if args.meta_json:
|
if args.meta_json:
|
||||||
@@ -1755,7 +2111,7 @@ def main() -> int:
|
|||||||
info(f'wrote metadata: {args.meta_json}')
|
info(f'wrote metadata: {args.meta_json}')
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
features, meta = load_features_from_img(args.img, mapsets=args.mapset, bbox=bbox)
|
features, meta = load_features_from_img(args.img, mapsets=args.mapset, bbox=bbox, extract_images_dir=args.extract_images_dir)
|
||||||
info(f'parsed {len(features)} features from {len(meta.get("mapsets", []))} mapsets')
|
info(f'parsed {len(features)} features from {len(meta.get("mapsets", []))} mapsets')
|
||||||
|
|
||||||
filtered = [
|
filtered = [
|
||||||
@@ -1773,6 +2129,11 @@ def main() -> int:
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if args.list_image_groups:
|
||||||
|
print_image_group_table(filtered)
|
||||||
|
if args.image_groups_csv:
|
||||||
|
write_image_groups_csv(filtered, args.image_groups_csv)
|
||||||
|
info(f'wrote image groups CSV: {args.image_groups_csv}')
|
||||||
if args.list_feature_types:
|
if args.list_feature_types:
|
||||||
print_feature_type_table(filtered, point_only=False)
|
print_feature_type_table(filtered, point_only=False)
|
||||||
if args.list_landmark_types:
|
if args.list_landmark_types:
|
||||||
@@ -1,115 +1,111 @@
|
|||||||
I kept pushing it.
|
I reworked the extractor around the weak spots you pointed out and validated it against the uploaded `gmapsupp.img`, using GPXSee’s Garmin IMG support and source tree as the reference model for the label and local-field paths. GPXSee officially supports Garmin IMG/GMAP maps, and its public source is the right place to mirror parsing behavior from. ([gpxsee.org][1])
|
||||||
|
|
||||||
The useful step here was not trying to turn GPXSee into an exporter wholesale, but using it as a reference model for Garmin object classification and parser structure while keeping the Python extractor focused on export. GPXSee does support Garmin IMG/GMAP offline maps, and OsmAndMapCreator’s documented shell modes include `generate-obf`, `generate-map`, `generate-poi`, and `generate-roads`, so we can separate “feature extraction works” from “routing index is still unstable.” ([gpxsee.org][1])
|
Updated script:
|
||||||
|
[garmin_img_to_osmand_v5.py](sandbox:/mnt/data/garmin_img_to_osmand_v5.py)
|
||||||
|
|
||||||
I built a new revision here:
|
What changed:
|
||||||
|
|
||||||
[garmin_img_to_osmand_v4.py](sandbox:/mnt/data/garmin_img_to_osmand_v4.py)
|
* **better name extraction**
|
||||||
|
|
||||||
What changed in v4:
|
* fixed the big one: standard point labels now respect the Garmin **POI indirection bit**, instead of always treating the label offset as a direct string pointer
|
||||||
|
* codepage handling is kept for your map’s Cyrillic labels
|
||||||
|
* uppercase labels are normalized more cleanly
|
||||||
|
* control-character cleanup added
|
||||||
|
* **all mapsets / segments**
|
||||||
|
|
||||||
* exact-coordinate point landmark export is now a first-class path
|
* still traverses all mapsets
|
||||||
* unique landmark type summaries can be exported to CSV and JSON
|
* now records **segment stats** and **segment error previews** into metadata so you can see where parsing is still rough
|
||||||
* GPXSee-style class predicates are folded in as an additional taxonomy layer
|
* **image/icon grouping**
|
||||||
* water sources can be exported directly as CSV or GeoJSON
|
|
||||||
* filtering now works by semantic tag, Garmin type/subtype, and GPXSee-style class names
|
|
||||||
|
|
||||||
Sample outputs from your uploaded `02335140` mapset:
|
* extended points can now expose `garmin_image_id` when present in local fields
|
||||||
|
* landmark CSV/GeoJSON now includes:
|
||||||
|
|
||||||
* [water_sources_02335140.csv](sandbox:/mnt/data/water_sources_02335140.csv)
|
* `mapset`
|
||||||
* [water_sources_02335140.geojson](sandbox:/mnt/data/water_sources_02335140.geojson)
|
* `garmin_image_id`
|
||||||
* [landmark_types_02335140.csv](sandbox:/mnt/data/landmark_types_02335140.csv)
|
* `garmin_image_file`
|
||||||
* [landmark_types_02335140.json](sandbox:/mnt/data/landmark_types_02335140.json)
|
* best-effort embedded image dumping is supported with `--extract-images-dir`
|
||||||
|
* image grouping can be listed/exported with:
|
||||||
|
|
||||||
On that sample mapset, the current parser found 4 exact-coordinate water-source points, all of Garmin type `0x64` subtype `0x14`, which the current semantic layer maps to `amenity=drinking_water`.
|
* `--list-image-groups`
|
||||||
|
* `--image-groups-csv`
|
||||||
|
* **validation against your upload**
|
||||||
|
|
||||||
|
* the broken/truncated names improved a lot on the tested mapset
|
||||||
|
* examples that now decode sensibly include:
|
||||||
|
|
||||||
|
* `Полски ясен`
|
||||||
|
* `Андзова чешма`
|
||||||
|
* `Дубрава (310)`
|
||||||
|
* `Св. Димитър`
|
||||||
|
* `Паметник на Мико Нинов`
|
||||||
|
|
||||||
|
A concrete test I ran on your uploaded mapset `02234010`:
|
||||||
|
|
||||||
|
* parsed `52686` features
|
||||||
|
* water-source landmark export produced `1404` point rows
|
||||||
|
* segment stats reported parsed points/polylines/polygons instead of only points
|
||||||
|
|
||||||
Use it like this.
|
Use it like this.
|
||||||
|
|
||||||
List all exact-coordinate landmark types for a tile:
|
Better landmark CSV with mapset + image fields:
|
||||||
|
|
||||||
```bash id="51801"
|
```bash
|
||||||
python garmin_img_to_osmand_v4.py gmapsupp.img ^
|
python garmin_img_to_osmand_v5.py gmapsupp.img ^
|
||||||
--mapset 02335140 ^
|
--mapset 02234010 ^
|
||||||
|
--category water_sources ^
|
||||||
|
--landmarks-csv water_sources.csv ^
|
||||||
|
--meta-json water_sources.meta.json
|
||||||
|
```
|
||||||
|
|
||||||
|
List landmark types with better names:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python garmin_img_to_osmand_v5.py gmapsupp.img ^
|
||||||
|
--mapset 02234010 ^
|
||||||
--list-landmark-types
|
--list-landmark-types
|
||||||
```
|
```
|
||||||
|
|
||||||
Export the landmark type summary in machine-readable form:
|
List image/icon groups:
|
||||||
|
|
||||||
```bash id="51802"
|
```bash
|
||||||
python garmin_img_to_osmand_v4.py gmapsupp.img ^
|
python garmin_img_to_osmand_v5.py gmapsupp.img ^
|
||||||
--mapset 02335140 ^
|
--mapset 02234010 ^
|
||||||
--landmark-types-csv landmark_types.csv ^
|
--list-image-groups
|
||||||
--landmark-types-json landmark_types.json
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Export all exact-coordinate point landmarks:
|
Export image/icon group summary:
|
||||||
|
|
||||||
```bash id="51803"
|
```bash
|
||||||
python garmin_img_to_osmand_v4.py gmapsupp.img ^
|
python garmin_img_to_osmand_v5.py gmapsupp.img ^
|
||||||
--mapset 02335140 ^
|
--mapset 02234010 ^
|
||||||
--landmarks-csv points.csv ^
|
--image-groups-csv image_groups.csv
|
||||||
--landmarks-geojson points.geojson
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Export only water sources:
|
Best-effort dump embedded Garmin image blobs:
|
||||||
|
|
||||||
```bash id="51804"
|
```bash
|
||||||
python garmin_img_to_osmand_v4.py gmapsupp.img ^
|
python garmin_img_to_osmand_v5.py gmapsupp.img ^
|
||||||
--mapset 02335140 ^
|
--mapset 02234010 ^
|
||||||
--category water_sources ^
|
--extract-images-dir dumped_images ^
|
||||||
--landmarks-csv water_sources.csv ^
|
--landmarks-csv landmarks.csv
|
||||||
--landmarks-geojson water_sources.geojson
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Filter by semantic tag directly:
|
The landmark CSV now has these extra columns:
|
||||||
|
|
||||||
```bash id="51805"
|
* `mapset`
|
||||||
python garmin_img_to_osmand_v4.py gmapsupp.img ^
|
* `garmin_image_id`
|
||||||
--filter-tag amenity=drinking_water ^
|
* `garmin_image_file`
|
||||||
--landmarks-csv drinking_water.csv
|
|
||||||
```
|
|
||||||
|
|
||||||
Filter by raw Garmin type:
|
So even if a real image blob is not recoverable for a given point, you can still group by image id or by dumped image file when present.
|
||||||
|
|
||||||
```bash id="51806"
|
One important note: on the tested mapset, I did **not** see a rich population of extracted image-backed point icons yet. The support is there now, but this particular IMG seems to benefit much more from the **fixed label path** than from embedded-image extraction. So the image grouping path is now available, but the biggest win on your sample was clearly the label repair.
|
||||||
python garmin_img_to_osmand_v4.py gmapsupp.img ^
|
|
||||||
--filter-kind point ^
|
|
||||||
--filter-type 0x64 ^
|
|
||||||
--filter-subtype 0x14 ^
|
|
||||||
--landmarks-csv type_64_14.csv
|
|
||||||
```
|
|
||||||
|
|
||||||
Filter by GPXSee-style class:
|
What is still not fully okay:
|
||||||
|
|
||||||
```bash id="51807"
|
* some segment errors remain, and the script now reports them in metadata instead of silently hiding them
|
||||||
python garmin_img_to_osmand_v4.py gmapsupp.img ^
|
* extended/local/class Garmin fields are still only partially decoded
|
||||||
--gpxsee-class water_line ^
|
* raw embedded icon extraction is best-effort, not guaranteed for every IMG flavor
|
||||||
--geojson water_lines.geojson
|
|
||||||
```
|
|
||||||
|
|
||||||
The routing crash you posted is still a separate layer. The good news is that your later log proved the map index itself is no longer empty; the failure is in OsmAnd’s route-connectivity post-processing, not in basic OSM loading. Until the road topology export is cleaner, use:
|
The next best refinement is to push one more level into GPXSee-style local/class field parsing so more special POIs get richer metadata instead of only names and coordinates.
|
||||||
|
|
||||||
```bash id="51808"
|
|
||||||
java.exe -Xms256M -Xmx4096M -cp "OsmAndMapCreator.jar;lib/*" ^
|
|
||||||
net.osmand.MainUtilities generate-map C:\path\to\out.osm.gz
|
|
||||||
```
|
|
||||||
|
|
||||||
or:
|
|
||||||
|
|
||||||
```bash id="51809"
|
|
||||||
java.exe -Xms256M -Xmx4096M -cp "OsmAndMapCreator.jar;lib/*" ^
|
|
||||||
net.osmand.MainUtilities generate-poi C:\path\to\out.osm.gz
|
|
||||||
```
|
|
||||||
|
|
||||||
Those modes are explicitly supported by OsmAndMapCreator’s documented utility commands. ([gpxsee.org][1])
|
|
||||||
|
|
||||||
What still needs more reverse engineering:
|
|
||||||
|
|
||||||
* better label decoding, especially where names are clearly truncated or mangled
|
|
||||||
* fuller support for GPXSee-style extended/local/class fields on marine and special points
|
|
||||||
* safer road export so `generate-obf` can survive the route phase
|
|
||||||
* possibly Huffman-backed text/object decoding for maps that use those sections
|
|
||||||
|
|
||||||
The next high-value target is the label path and extended point metadata, because that improves both landmark names and water-source extraction quality without waiting for full routing stability.
|
|
||||||
|
|
||||||
[1]: https://www.gpxsee.org/doc "https://www.gpxsee.org/doc"
|
[1]: https://www.gpxsee.org/doc "https://www.gpxsee.org/doc"
|
||||||
|
|||||||
@@ -6,13 +6,13 @@ import csv
|
|||||||
import gzip
|
import gzip
|
||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Callable, Iterable, Optional
|
from typing import Iterable, Optional
|
||||||
from xml.dom import minidom
|
|
||||||
|
|
||||||
OSMAND_NS = "https://osmand.net"
|
OSMAND_NS = "https://osmand.net"
|
||||||
GPX_NS = "http://www.topografix.com/GPX/1/1"
|
GPX_NS = "http://www.topografix.com/GPX/1/1"
|
||||||
@@ -20,6 +20,11 @@ ET.register_namespace("osmand", OSMAND_NS)
|
|||||||
|
|
||||||
EARTH_M_PER_DEG_LAT = 111_320.0
|
EARTH_M_PER_DEG_LAT = 111_320.0
|
||||||
|
|
||||||
|
# XML 1.0 valid chars: tab, CR, LF, and U+0020..U+D7FF, U+E000..U+FFFD, U+10000..U+10FFFF
|
||||||
|
_XML_INVALID_RE = re.compile(
|
||||||
|
r"[\x00-\x08\x0B\x0C\x0E-\x1F\uD800-\uDFFF\uFFFE\uFFFF]"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Landmark:
|
class Landmark:
|
||||||
@@ -82,6 +87,20 @@ DEFAULT_GROUPS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_text(value: object) -> str:
|
||||||
|
if value is None:
|
||||||
|
return ""
|
||||||
|
text = str(value)
|
||||||
|
# normalize newlines, remove NULs/control chars and broken surrogate leftovers
|
||||||
|
text = text.replace("\r\n", "\n").replace("\r", "\n")
|
||||||
|
text = _XML_INVALID_RE.sub("", text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def safe_json(data: object) -> str:
|
||||||
|
return sanitize_text(json.dumps(data, ensure_ascii=False, sort_keys=True))
|
||||||
|
|
||||||
|
|
||||||
def open_text_out(path: Path):
|
def open_text_out(path: Path):
|
||||||
if str(path).lower().endswith(".gz"):
|
if str(path).lower().endswith(".gz"):
|
||||||
return gzip.open(path, "wt", encoding="utf-8", newline="")
|
return gzip.open(path, "wt", encoding="utf-8", newline="")
|
||||||
@@ -89,13 +108,19 @@ def open_text_out(path: Path):
|
|||||||
|
|
||||||
|
|
||||||
def write_xml(path: Path, root: ET.Element) -> None:
|
def write_xml(path: Path, root: ET.Element) -> None:
|
||||||
xml_bytes = ET.tostring(root, encoding="utf-8")
|
# ElementTree is more robust here than round-tripping through minidom,
|
||||||
pretty = minidom.parseString(xml_bytes).toprettyxml(indent=" ", encoding="utf-8")
|
# and avoids parsing giant XML back into memory.
|
||||||
|
tree = ET.ElementTree(root)
|
||||||
|
try:
|
||||||
|
ET.indent(tree, space=" ") # Python 3.9+
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
if str(path).lower().endswith(".gz"):
|
if str(path).lower().endswith(".gz"):
|
||||||
with gzip.open(path, "wb") as f:
|
with gzip.open(path, "wb") as f:
|
||||||
f.write(pretty)
|
tree.write(f, encoding="utf-8", xml_declaration=True)
|
||||||
else:
|
else:
|
||||||
path.write_bytes(pretty)
|
with path.open("wb") as f:
|
||||||
|
tree.write(f, encoding="utf-8", xml_declaration=True)
|
||||||
|
|
||||||
|
|
||||||
def load_landmarks(paths: Iterable[Path]) -> list[Landmark]:
|
def load_landmarks(paths: Iterable[Path]) -> list[Landmark]:
|
||||||
@@ -103,16 +128,19 @@ def load_landmarks(paths: Iterable[Path]) -> list[Landmark]:
|
|||||||
for path in paths:
|
for path in paths:
|
||||||
with path.open("r", encoding="utf-8-sig", newline="") as f:
|
with path.open("r", encoding="utf-8-sig", newline="") as f:
|
||||||
reader = csv.DictReader(f)
|
reader = csv.DictReader(f)
|
||||||
required = {"lon", "lat", "name", "garmin_kind", "garmin_type", "garmin_subtype"}
|
fieldnames = set(reader.fieldnames or [])
|
||||||
missing = required - set(reader.fieldnames or [])
|
required = {"lon", "lat", "name"}
|
||||||
|
missing = required - fieldnames
|
||||||
if missing:
|
if missing:
|
||||||
raise ValueError(f"{path}: missing columns: {sorted(missing)}")
|
raise ValueError(f"{path}: missing columns: {sorted(missing)}")
|
||||||
|
|
||||||
for row in reader:
|
for row in reader:
|
||||||
try:
|
try:
|
||||||
lon = float(row["lon"])
|
lon = float(row["lon"])
|
||||||
lat = float(row["lat"])
|
lat = float(row["lat"])
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
gpxsee_classes = json.loads(row.get("gpxsee_classes_json") or "[]")
|
gpxsee_classes = json.loads(row.get("gpxsee_classes_json") or "[]")
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -121,27 +149,38 @@ def load_landmarks(paths: Iterable[Path]) -> list[Landmark]:
|
|||||||
semantic_tags = json.loads(row.get("semantic_tags_json") or "{}")
|
semantic_tags = json.loads(row.get("semantic_tags_json") or "{}")
|
||||||
except Exception:
|
except Exception:
|
||||||
semantic_tags = {}
|
semantic_tags = {}
|
||||||
|
|
||||||
|
name = sanitize_text((row.get("name") or "").strip())
|
||||||
|
garmin_kind = sanitize_text((row.get("garmin_kind") or "point").strip())
|
||||||
|
garmin_type = sanitize_text((row.get("garmin_type") or "").strip().lower())
|
||||||
|
garmin_subtype = sanitize_text((row.get("garmin_subtype") or "").strip().lower())
|
||||||
|
|
||||||
|
if isinstance(semantic_tags, dict):
|
||||||
|
semantic_tags = {sanitize_text(k): sanitize_text(v) for k, v in semantic_tags.items() if sanitize_text(k)}
|
||||||
|
else:
|
||||||
|
semantic_tags = {}
|
||||||
|
if isinstance(gpxsee_classes, list):
|
||||||
|
gpxsee_classes = [sanitize_text(v).strip() for v in gpxsee_classes if sanitize_text(v).strip()]
|
||||||
|
else:
|
||||||
|
gpxsee_classes = []
|
||||||
|
|
||||||
items.append(Landmark(
|
items.append(Landmark(
|
||||||
lon=lon,
|
lon=lon,
|
||||||
lat=lat,
|
lat=lat,
|
||||||
name=(row.get("name") or "").strip(),
|
name=name,
|
||||||
garmin_kind=(row.get("garmin_kind") or "").strip(),
|
garmin_kind=garmin_kind,
|
||||||
garmin_type=(row.get("garmin_type") or "").strip().lower(),
|
garmin_type=garmin_type,
|
||||||
garmin_subtype=(row.get("garmin_subtype") or "").strip().lower(),
|
garmin_subtype=garmin_subtype,
|
||||||
gpxsee_classes=gpxsee_classes if isinstance(gpxsee_classes, list) else [],
|
gpxsee_classes=gpxsee_classes,
|
||||||
semantic_tags=semantic_tags if isinstance(semantic_tags, dict) else {},
|
semantic_tags=semantic_tags,
|
||||||
source_files=[path.name],
|
source_files=[path.name],
|
||||||
duplicate_names=[(row.get("name") or "").strip()] if (row.get("name") or "").strip() else [],
|
duplicate_names=[name] if name else [],
|
||||||
duplicate_types=[(row.get("garmin_type") or "").strip().lower()],
|
duplicate_types=[garmin_type] if garmin_type else [],
|
||||||
duplicate_subtypes=[(row.get("garmin_subtype") or "").strip().lower()],
|
duplicate_subtypes=[garmin_subtype] if garmin_subtype else [],
|
||||||
))
|
))
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------
|
|
||||||
# Semantic inference / groups
|
|
||||||
# ----------------------------
|
|
||||||
|
|
||||||
def gpxsee_class_flags(item: Landmark) -> set[str]:
|
def gpxsee_class_flags(item: Landmark) -> set[str]:
|
||||||
return {str(v).strip().lower() for v in item.gpxsee_classes if str(v).strip()}
|
return {str(v).strip().lower() for v in item.gpxsee_classes if str(v).strip()}
|
||||||
|
|
||||||
@@ -224,10 +263,6 @@ def infer_group_style(key: str) -> dict[str, str]:
|
|||||||
return {"name": humanize_group_name(key), "color": "#FB8C00", "icon": "marker", "background": "circle"}
|
return {"name": humanize_group_name(key), "color": "#FB8C00", "icon": "marker", "background": "circle"}
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------
|
|
||||||
# Dedupe
|
|
||||||
# ----------------------------
|
|
||||||
|
|
||||||
def meters_per_deg_lon(lat_deg: float) -> float:
|
def meters_per_deg_lon(lat_deg: float) -> float:
|
||||||
return EARTH_M_PER_DEG_LAT * max(0.01, math.cos(math.radians(lat_deg)))
|
return EARTH_M_PER_DEG_LAT * max(0.01, math.cos(math.radians(lat_deg)))
|
||||||
|
|
||||||
@@ -256,11 +291,10 @@ def merge_landmarks(primary: Landmark, other: Landmark) -> Landmark:
|
|||||||
merged = best.clone()
|
merged = best.clone()
|
||||||
merged.duplicate_count = primary.duplicate_count + other.duplicate_count
|
merged.duplicate_count = primary.duplicate_count + other.duplicate_count
|
||||||
merged.source_files = sorted(set(primary.source_files + other.source_files))
|
merged.source_files = sorted(set(primary.source_files + other.source_files))
|
||||||
merged.duplicate_names = sorted({n for n in primary.duplicate_names + other.duplicate_names if n})
|
merged.duplicate_names = sorted({sanitize_text(n) for n in primary.duplicate_names + other.duplicate_names if sanitize_text(n)})
|
||||||
merged.duplicate_types = sorted(set(primary.duplicate_types + other.duplicate_types))
|
merged.duplicate_types = sorted(set(filter(None, primary.duplicate_types + other.duplicate_types)))
|
||||||
merged.duplicate_subtypes = sorted(set(primary.duplicate_subtypes + other.duplicate_subtypes))
|
merged.duplicate_subtypes = sorted(set(filter(None, primary.duplicate_subtypes + other.duplicate_subtypes)))
|
||||||
|
|
||||||
# Prefer the richest semantic tag set, but merge missing keys from the other side.
|
|
||||||
richer = primary.semantic_tags if len(primary.semantic_tags) >= len(other.semantic_tags) else other.semantic_tags
|
richer = primary.semantic_tags if len(primary.semantic_tags) >= len(other.semantic_tags) else other.semantic_tags
|
||||||
poorer = other.semantic_tags if richer is primary.semantic_tags else primary.semantic_tags
|
poorer = other.semantic_tags if richer is primary.semantic_tags else primary.semantic_tags
|
||||||
merged.semantic_tags = dict(richer)
|
merged.semantic_tags = dict(richer)
|
||||||
@@ -270,7 +304,7 @@ def merge_landmarks(primary: Landmark, other: Landmark) -> Landmark:
|
|||||||
|
|
||||||
merged.gpxsee_classes = sorted(set(primary.gpxsee_classes + other.gpxsee_classes))
|
merged.gpxsee_classes = sorted(set(primary.gpxsee_classes + other.gpxsee_classes))
|
||||||
if not merged.name:
|
if not merged.name:
|
||||||
merged.name = primary.name or other.name
|
merged.name = sanitize_text(primary.name or other.name)
|
||||||
return merged
|
return merged
|
||||||
|
|
||||||
|
|
||||||
@@ -320,10 +354,6 @@ def dedupe(items: list[Landmark], radius_m: float = 12.0, mode: str = "coord") -
|
|||||||
return clusters
|
return clusters
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------
|
|
||||||
# Filtering and grouping
|
|
||||||
# ----------------------------
|
|
||||||
|
|
||||||
def apply_filters(
|
def apply_filters(
|
||||||
items: list[Landmark],
|
items: list[Landmark],
|
||||||
category: Optional[str],
|
category: Optional[str],
|
||||||
@@ -369,16 +399,63 @@ def make_group_key(it: Landmark, mode: str) -> str:
|
|||||||
def sample_label(it: Landmark) -> str:
|
def sample_label(it: Landmark) -> str:
|
||||||
parts = []
|
parts = []
|
||||||
if it.name:
|
if it.name:
|
||||||
parts.append(it.name)
|
parts.append(sanitize_text(it.name))
|
||||||
parts.append(f"{it.lon:.5f},{it.lat:.5f}")
|
parts.append(f"{it.lon:.5f},{it.lat:.5f}")
|
||||||
parts.append(f"{it.garmin_type}/{it.garmin_subtype}")
|
parts.append(f"{it.garmin_type}/{it.garmin_subtype}")
|
||||||
if it.semantic_tags:
|
if it.semantic_tags:
|
||||||
cleaned = {k: v for k, v in it.semantic_tags.items() if k != "name"}
|
cleaned = {k: v for k, v in it.semantic_tags.items() if k != "name"}
|
||||||
if cleaned:
|
if cleaned:
|
||||||
parts.append(json.dumps(cleaned, ensure_ascii=False, sort_keys=True))
|
parts.append(safe_json(cleaned))
|
||||||
return " | ".join(parts)
|
return " | ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def spread_examples(group_items: list[Landmark], example_count: int) -> list[str]:
|
||||||
|
if example_count <= 0 or not group_items:
|
||||||
|
return []
|
||||||
|
|
||||||
|
ordered = sorted(group_items, key=lambda it: (
|
||||||
|
sanitize_text(it.source_files[0] if it.source_files else ""),
|
||||||
|
round(it.lon, 6),
|
||||||
|
round(it.lat, 6),
|
||||||
|
sanitize_text(it.name),
|
||||||
|
))
|
||||||
|
|
||||||
|
n = len(ordered)
|
||||||
|
if n <= example_count:
|
||||||
|
candidates = ordered
|
||||||
|
elif example_count == 1:
|
||||||
|
candidates = [ordered[n // 2]]
|
||||||
|
else:
|
||||||
|
idxs = []
|
||||||
|
for i in range(example_count):
|
||||||
|
idx = round(i * (n - 1) / (example_count - 1))
|
||||||
|
idxs.append(int(idx))
|
||||||
|
# keep order, unique indices
|
||||||
|
seen_idx = set()
|
||||||
|
candidates = []
|
||||||
|
for idx in idxs:
|
||||||
|
if idx not in seen_idx:
|
||||||
|
seen_idx.add(idx)
|
||||||
|
candidates.append(ordered[idx])
|
||||||
|
|
||||||
|
examples: list[str] = []
|
||||||
|
seen = set()
|
||||||
|
if candidates:
|
||||||
|
chosen_set = set(id(x) for x in candidates)
|
||||||
|
else:
|
||||||
|
chosen_set = set()
|
||||||
|
|
||||||
|
for it in candidates + ordered:
|
||||||
|
lbl = sample_label(it)
|
||||||
|
if lbl in seen:
|
||||||
|
continue
|
||||||
|
seen.add(lbl)
|
||||||
|
examples.append(lbl)
|
||||||
|
if len(examples) >= example_count:
|
||||||
|
break
|
||||||
|
return examples
|
||||||
|
|
||||||
|
|
||||||
def build_groups(items: list[Landmark], mode: str, example_count: int = 3) -> dict[str, GroupDefinition]:
|
def build_groups(items: list[Landmark], mode: str, example_count: int = 3) -> dict[str, GroupDefinition]:
|
||||||
grouped: dict[str, list[Landmark]] = defaultdict(list)
|
grouped: dict[str, list[Landmark]] = defaultdict(list)
|
||||||
for it in items:
|
for it in items:
|
||||||
@@ -387,16 +464,7 @@ def build_groups(items: list[Landmark], mode: str, example_count: int = 3) -> di
|
|||||||
result: dict[str, GroupDefinition] = {}
|
result: dict[str, GroupDefinition] = {}
|
||||||
for key, group_items in sorted(grouped.items()):
|
for key, group_items in sorted(grouped.items()):
|
||||||
style = infer_group_style(key)
|
style = infer_group_style(key)
|
||||||
examples = []
|
examples = spread_examples(group_items, example_count)
|
||||||
seen = set()
|
|
||||||
for it in group_items:
|
|
||||||
lbl = sample_label(it)
|
|
||||||
if lbl in seen:
|
|
||||||
continue
|
|
||||||
seen.add(lbl)
|
|
||||||
examples.append(lbl)
|
|
||||||
if len(examples) >= example_count:
|
|
||||||
break
|
|
||||||
result[key] = GroupDefinition(
|
result[key] = GroupDefinition(
|
||||||
key=key,
|
key=key,
|
||||||
name=style["name"],
|
name=style["name"],
|
||||||
@@ -434,32 +502,34 @@ def interactive_rename_groups(groups: dict[str, GroupDefinition], enabled: bool,
|
|||||||
if reply == "!":
|
if reply == "!":
|
||||||
keep_all = True
|
keep_all = True
|
||||||
elif reply:
|
elif reply:
|
||||||
group.name = reply
|
group.name = sanitize_text(reply)
|
||||||
print(file=sys.stderr)
|
print(file=sys.stderr)
|
||||||
return groups
|
return groups
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------
|
def xml_text(el: ET.Element, text: object) -> None:
|
||||||
# Writers
|
value = sanitize_text(text)
|
||||||
# ----------------------------
|
if value:
|
||||||
|
el.text = value
|
||||||
|
|
||||||
|
|
||||||
def write_gpx(items: list[Landmark], groups: dict[str, GroupDefinition], out_path: Path, by: str = "auto") -> None:
|
def write_gpx(items: list[Landmark], groups: dict[str, GroupDefinition], out_path: Path, by: str = "auto") -> None:
|
||||||
gpx = ET.Element("gpx", {
|
gpx = ET.Element("gpx", {
|
||||||
"version": "1.1",
|
"version": "1.1",
|
||||||
"creator": "landmarks_csv_to_osmand_v2.py",
|
"creator": "landmarks_csv_to_osmand_v3.py",
|
||||||
"xmlns": GPX_NS,
|
"xmlns": GPX_NS,
|
||||||
})
|
})
|
||||||
metadata = ET.SubElement(gpx, "metadata")
|
metadata = ET.SubElement(gpx, "metadata")
|
||||||
ET.SubElement(metadata, "name").text = out_path.stem
|
xml_text(ET.SubElement(metadata, "name"), out_path.stem)
|
||||||
|
|
||||||
groups_el = ET.SubElement(ET.SubElement(gpx, "extensions"), f"{{{OSMAND_NS}}}points_groups")
|
groups_el = ET.SubElement(ET.SubElement(gpx, "extensions"), f"{{{OSMAND_NS}}}points_groups")
|
||||||
for key in sorted(groups):
|
for key in sorted(groups):
|
||||||
cfg = groups[key]
|
cfg = groups[key]
|
||||||
ET.SubElement(groups_el, f"{{{OSMAND_NS}}}group", {
|
ET.SubElement(groups_el, f"{{{OSMAND_NS}}}group", {
|
||||||
"name": cfg.name,
|
"name": sanitize_text(cfg.name),
|
||||||
"color": cfg.color,
|
"color": sanitize_text(cfg.color),
|
||||||
"icon": cfg.icon,
|
"icon": sanitize_text(cfg.icon),
|
||||||
"background": cfg.background,
|
"background": sanitize_text(cfg.background),
|
||||||
})
|
})
|
||||||
|
|
||||||
for it in items:
|
for it in items:
|
||||||
@@ -467,43 +537,47 @@ def write_gpx(items: list[Landmark], groups: dict[str, GroupDefinition], out_pat
|
|||||||
cfg = groups[gkey]
|
cfg = groups[gkey]
|
||||||
wpt = ET.SubElement(gpx, "wpt", {"lat": f"{it.lat:.8f}", "lon": f"{it.lon:.8f}"})
|
wpt = ET.SubElement(gpx, "wpt", {"lat": f"{it.lat:.8f}", "lon": f"{it.lon:.8f}"})
|
||||||
if it.name:
|
if it.name:
|
||||||
ET.SubElement(wpt, "name").text = it.name
|
xml_text(ET.SubElement(wpt, "name"), it.name)
|
||||||
ET.SubElement(wpt, "type").text = cfg.name
|
xml_text(ET.SubElement(wpt, "type"), cfg.name)
|
||||||
desc_parts = []
|
desc_parts = []
|
||||||
if it.semantic_tags:
|
if it.semantic_tags:
|
||||||
desc_parts.append("semantic: " + json.dumps(it.semantic_tags, ensure_ascii=False, sort_keys=True))
|
desc_parts.append("semantic: " + safe_json(it.semantic_tags))
|
||||||
desc_parts.append(f"garmin: kind={it.garmin_kind} type={it.garmin_type} subtype={it.garmin_subtype}")
|
desc_parts.append(f"garmin: kind={sanitize_text(it.garmin_kind)} type={sanitize_text(it.garmin_type)} subtype={sanitize_text(it.garmin_subtype)}")
|
||||||
if it.duplicate_count > 1:
|
if it.duplicate_count > 1:
|
||||||
desc_parts.append(f"dedupe: merged {it.duplicate_count} records")
|
desc_parts.append(f"dedupe: merged {it.duplicate_count} records")
|
||||||
if it.source_files:
|
if it.source_files:
|
||||||
desc_parts.append("sources=" + ", ".join(it.source_files))
|
desc_parts.append("sources=" + ", ".join(sanitize_text(s) for s in it.source_files))
|
||||||
ET.SubElement(wpt, "desc").text = "\n".join(desc_parts)
|
xml_text(ET.SubElement(wpt, "desc"), "\n".join(desc_parts))
|
||||||
ext = ET.SubElement(wpt, "extensions")
|
ext = ET.SubElement(wpt, "extensions")
|
||||||
ET.SubElement(ext, f"{{{OSMAND_NS}}}icon").text = cfg.icon
|
xml_text(ET.SubElement(ext, f"{{{OSMAND_NS}}}icon"), cfg.icon)
|
||||||
ET.SubElement(ext, f"{{{OSMAND_NS}}}color").text = cfg.color
|
xml_text(ET.SubElement(ext, f"{{{OSMAND_NS}}}color"), cfg.color)
|
||||||
ET.SubElement(ext, f"{{{OSMAND_NS}}}background").text = cfg.background
|
xml_text(ET.SubElement(ext, f"{{{OSMAND_NS}}}background"), cfg.background)
|
||||||
write_xml(out_path, gpx)
|
write_xml(out_path, gpx)
|
||||||
|
|
||||||
|
|
||||||
def write_osm(items: list[Landmark], out_path: Path) -> None:
|
def write_osm(items: list[Landmark], out_path: Path) -> None:
|
||||||
osm = ET.Element("osm", {"version": "0.6", "generator": "landmarks_csv_to_osmand_v2.py"})
|
osm = ET.Element("osm", {"version": "0.6", "generator": "landmarks_csv_to_osmand_v3.py"})
|
||||||
nid = -1
|
nid = -1
|
||||||
for it in items:
|
for it in items:
|
||||||
node = ET.SubElement(osm, "node", {"id": str(nid), "lat": f"{it.lat:.8f}", "lon": f"{it.lon:.8f}"})
|
node = ET.SubElement(osm, "node", {"id": str(nid), "lat": f"{it.lat:.8f}", "lon": f"{it.lon:.8f}"})
|
||||||
nid -= 1
|
nid -= 1
|
||||||
if it.name:
|
if it.name:
|
||||||
ET.SubElement(node, "tag", {"k": "name", "v": it.name})
|
ET.SubElement(node, "tag", {"k": "name", "v": sanitize_text(it.name)})
|
||||||
for k, v in sorted(it.semantic_tags.items()):
|
for k, v in sorted(it.semantic_tags.items()):
|
||||||
if v is None:
|
k2 = sanitize_text(k)
|
||||||
|
v2 = sanitize_text(v)
|
||||||
|
if not k2 or not v2:
|
||||||
continue
|
continue
|
||||||
ET.SubElement(node, "tag", {"k": str(k), "v": str(v)})
|
ET.SubElement(node, "tag", {"k": k2, "v": v2})
|
||||||
ET.SubElement(node, "tag", {"k": "garmin:kind", "v": it.garmin_kind})
|
ET.SubElement(node, "tag", {"k": "garmin:kind", "v": sanitize_text(it.garmin_kind)})
|
||||||
ET.SubElement(node, "tag", {"k": "garmin:type", "v": it.garmin_type})
|
if it.garmin_type:
|
||||||
ET.SubElement(node, "tag", {"k": "garmin:subtype", "v": it.garmin_subtype})
|
ET.SubElement(node, "tag", {"k": "garmin:type", "v": sanitize_text(it.garmin_type)})
|
||||||
|
if it.garmin_subtype:
|
||||||
|
ET.SubElement(node, "tag", {"k": "garmin:subtype", "v": sanitize_text(it.garmin_subtype)})
|
||||||
if it.duplicate_count > 1:
|
if it.duplicate_count > 1:
|
||||||
ET.SubElement(node, "tag", {"k": "source:merge_count", "v": str(it.duplicate_count)})
|
ET.SubElement(node, "tag", {"k": "source:merge_count", "v": str(it.duplicate_count)})
|
||||||
if it.source_files:
|
if it.source_files:
|
||||||
ET.SubElement(node, "tag", {"k": "source:file", "v": ",".join(it.source_files)})
|
ET.SubElement(node, "tag", {"k": "source:file", "v": sanitize_text(",".join(it.source_files))})
|
||||||
write_xml(out_path, osm)
|
write_xml(out_path, osm)
|
||||||
|
|
||||||
|
|
||||||
@@ -517,7 +591,7 @@ def write_summary(items: list[Landmark], groups: dict[str, GroupDefinition], out
|
|||||||
w.writerow(["group_key", "group_name", "count", "examples"])
|
w.writerow(["group_key", "group_name", "count", "examples"])
|
||||||
for key, value in sorted(counts.items()):
|
for key, value in sorted(counts.items()):
|
||||||
group = groups[key]
|
group = groups[key]
|
||||||
w.writerow([key, group.name, value, " || ".join(group.examples)])
|
w.writerow([sanitize_text(key), sanitize_text(group.name), value, " || ".join(sanitize_text(x) for x in group.examples)])
|
||||||
if out_json:
|
if out_json:
|
||||||
payload = {
|
payload = {
|
||||||
key: {
|
key: {
|
||||||
@@ -546,7 +620,7 @@ def print_groups(groups: dict[str, GroupDefinition]) -> None:
|
|||||||
|
|
||||||
def main(argv: Optional[list[str]] = None) -> int:
|
def main(argv: Optional[list[str]] = None) -> int:
|
||||||
ap = argparse.ArgumentParser(
|
ap = argparse.ArgumentParser(
|
||||||
description="Convert landmark CSV exports into OsmAnd-friendly GPX overlays and/or OSM POI input, with stronger grouping and modular coordinate-based dedupe."
|
description="Convert landmark CSV exports into OsmAnd-friendly GPX overlays and/or OSM POI input, with stronger grouping, spread-out sampling, and modular coordinate-based dedupe."
|
||||||
)
|
)
|
||||||
ap.add_argument("csv", nargs="+", type=Path, help="Input landmark CSV files")
|
ap.add_argument("csv", nargs="+", type=Path, help="Input landmark CSV files")
|
||||||
ap.add_argument("--gpx", type=Path, help="Write OsmAnd-friendly GPX waypoint overlay")
|
ap.add_argument("--gpx", type=Path, help="Write OsmAnd-friendly GPX waypoint overlay")
|
||||||
@@ -556,7 +630,7 @@ def main(argv: Optional[list[str]] = None) -> int:
|
|||||||
ap.add_argument("--group-by", choices=["auto", "category", "type", "source"], default="auto", help="How GPX waypoint groups should be organized")
|
ap.add_argument("--group-by", choices=["auto", "category", "type", "source"], default="auto", help="How GPX waypoint groups should be organized")
|
||||||
ap.add_argument("--show-groups", action="store_true", help="Print detected groups with a few examples")
|
ap.add_argument("--show-groups", action="store_true", help="Print detected groups with a few examples")
|
||||||
ap.add_argument("--interactive-group-names", action="store_true", help="Interactively rename auto-detected groups")
|
ap.add_argument("--interactive-group-names", action="store_true", help="Interactively rename auto-detected groups")
|
||||||
ap.add_argument("--group-examples", type=int, default=3, help="How many example rows to show for each group")
|
ap.add_argument("--group-examples", type=int, default=3, help="How many spread-out example rows to show for each group")
|
||||||
ap.add_argument("--category", help="Keep only one inferred category, e.g. drinking_water, springs, settlements_village")
|
ap.add_argument("--category", help="Keep only one inferred category, e.g. drinking_water, springs, settlements_village")
|
||||||
ap.add_argument("--filter-tag", action="append", default=[], help="Keep only landmarks whose semantic tags contain key=value")
|
ap.add_argument("--filter-tag", action="append", default=[], help="Keep only landmarks whose semantic tags contain key=value")
|
||||||
ap.add_argument("--filter-kind", help="Keep only one Garmin kind, e.g. point")
|
ap.add_argument("--filter-kind", help="Keep only one Garmin kind, e.g. point")
|
||||||
|
|||||||
@@ -1,89 +1,59 @@
|
|||||||
Done.
|
Found the two real problems.
|
||||||
|
|
||||||
|
First, your crash:
|
||||||
|
|
||||||
|
* `minidom.parseString(...)` was reparsing the entire generated XML in memory
|
||||||
|
* one of the landmark names or tag values contains an XML-illegal control character
|
||||||
|
* on a huge file, that blows up exactly like your `ExpatError: not well-formed (invalid token)`
|
||||||
|
|
||||||
|
Second, the sampling:
|
||||||
|
|
||||||
|
* the previous script was taking the first few examples in a group
|
||||||
|
* now it samples **spread out across the group**, so the examples are taken from different positions in the dataset instead of only the front
|
||||||
|
|
||||||
Updated script:
|
Updated script:
|
||||||
[landmarks_csv_to_osmand_v2.py](sandbox:/mnt/data/landmarks_csv_to_osmand_v2.py)
|
[landmarks_csv_to_osmand_v3.py](sandbox:/mnt/data/landmarks_csv_to_osmand_v3.py)
|
||||||
|
|
||||||
What changed:
|
What changed:
|
||||||
|
|
||||||
* default dedupe is now **coordinate-based only**
|
* removed the fragile `minidom.parseString()` round-trip
|
||||||
* dedupe is modular:
|
* writes XML directly with `ElementTree`
|
||||||
|
* sanitizes text and tag values to strip XML-invalid control characters
|
||||||
|
* supports `.gz` outputs correctly
|
||||||
|
* group examples are now **spread-out samples**
|
||||||
|
* interactive group renaming still works, but now the shown examples are better distributed
|
||||||
|
* loader still accepts both raw landmark CSVs and slimmer derived CSVs
|
||||||
|
|
||||||
* `coord`
|
Use it instead of the old one:
|
||||||
* `coord_name`
|
|
||||||
* `coord_type`
|
|
||||||
* `coord_name_type`
|
|
||||||
* `off`
|
|
||||||
* dedupe uses a meter radius instead of naive decimal rounding
|
|
||||||
* duplicates are **merged**, not just dropped
|
|
||||||
* merged records keep:
|
|
||||||
|
|
||||||
* best name
|
|
||||||
* richer semantic tags
|
|
||||||
* union of GPXSee classes
|
|
||||||
* source file list
|
|
||||||
* duplicate count
|
|
||||||
* grouping is improved:
|
|
||||||
|
|
||||||
* `--group-by auto`
|
|
||||||
* `--group-by category`
|
|
||||||
* `--group-by type`
|
|
||||||
* `--group-by source`
|
|
||||||
* automatic grouping now distinguishes things like:
|
|
||||||
|
|
||||||
* `amenity_drinking_water`
|
|
||||||
* `natural_spring`
|
|
||||||
* `place_village`
|
|
||||||
* fallback Garmin or GPXSee groups
|
|
||||||
* interactive group naming added:
|
|
||||||
|
|
||||||
* `--interactive-group-names`
|
|
||||||
* shows sample examples from each detected group
|
|
||||||
* lets you keep or rename the proposed group name
|
|
||||||
* loader is now tolerant of both:
|
|
||||||
|
|
||||||
* the raw landmark CSVs
|
|
||||||
* the slimmer per-category CSVs you are iterating through
|
|
||||||
* `.gpx.gz`, `.osm.gz`, `.json.gz`, `.csv.gz` now actually get gzip-written correctly
|
|
||||||
|
|
||||||
Useful commands:
|
|
||||||
|
|
||||||
Show detected groups with examples:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python landmarks_csv_to_osmand_v2.py *.csv --show-groups
|
python landmarks_csv_to_osmand_v3.py *.csv --osm bgmountains_poi.osm.gz
|
||||||
```
|
```
|
||||||
|
|
||||||
Interactive rename flow:
|
If you want to inspect grouping before writing:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python landmarks_csv_to_osmand_v2.py *.csv --show-groups --interactive-group-names --group-by auto --gpx landmarks.gpx
|
python landmarks_csv_to_osmand_v3.py *.csv --show-groups --group-examples 5
|
||||||
```
|
```
|
||||||
|
|
||||||
Tighter coordinate dedupe:
|
If you want interactive naming with spread-out examples:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python landmarks_csv_to_osmand_v2.py *.csv --dedupe-mode coord --dedupe-radius-m 6 --gpx landmarks.gpx
|
python landmarks_csv_to_osmand_v3.py *.csv --show-groups --interactive-group-names --group-examples 5 --gpx landmarks.gpx
|
||||||
```
|
```
|
||||||
|
|
||||||
Coordinate dedupe, but only merge when type also matches:
|
If you want POI-only OSM for OsmAndMapCreator:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python landmarks_csv_to_osmand_v2.py *.csv --dedupe-mode coord_type --dedupe-radius-m 10 --gpx landmarks.gpx
|
python landmarks_csv_to_osmand_v3.py *.csv --osm landmarks.osm.gz
|
||||||
```
|
```
|
||||||
|
|
||||||
Water-only overlay:
|
Then:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python landmarks_csv_to_osmand_v2.py *.csv --filter-tag amenity=drinking_water --gpx drinking_water.gpx
|
java.exe -Xms256M -Xmx4096M -cp "OsmAndMapCreator.jar;lib/*" net.osmand.MainUtilities generate-poi C:\path\to\landmarks.osm.gz
|
||||||
```
|
```
|
||||||
|
|
||||||
Springs-only overlay:
|
The main fix is this: the script now treats dirty text safely instead of trusting all names/tags to already be valid XML.
|
||||||
|
|
||||||
```bash
|
If you want, the next refinement should be a `--debug-bad-rows` mode that writes out the original rows whose text had to be sanitized, so you can see exactly which source landmarks were malformed.
|
||||||
python landmarks_csv_to_osmand_v2.py *.csv --filter-tag natural=spring --gpx springs.gpx
|
|
||||||
```
|
|
||||||
|
|
||||||
If you want the next revision, I’d push it further in two directions:
|
|
||||||
|
|
||||||
* add **dedupe reports** showing which rows got merged into which canonical landmark
|
|
||||||
* add **interactive split/merge controls** for ambiguous groups, not just rename controls
|
|
||||||
|
|||||||
6
summary.csv
Normal file
6
summary.csv
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
group_key,group_name,count,examples
|
||||||
|
amenity_drinking_water,Вода,55724,"22.30130,43.70876 | 0x64/0x14 | {""amenity"": ""drinking_water""} || 22.41769,43.77090 | 0x64/0x14 | {""amenity"": ""drinking_water""} || 22.41846,43.77485 | 0x64/0x14 | {""amenity"": ""drinking_water""}"
|
||||||
|
gpxsee_poi,Убежище,19619,"22.33757,43.92231 | 0x66/0x00 || 22.33117,43.93072 | 0x66/0x00 || 22.27306,43.90484 | 0x64/0x0e"
|
||||||
|
natural_peak,Връх,26720,"t for sale | 21.89343,43.77614 | 0x66/0x16 | {""natural"": ""peak""} || 21.87644,43.77691 | 0x66/0x16 | {""natural"": ""peak""} || 21.87640,43.77691 | 0x66/0x16 | {""natural"": ""peak""}"
|
||||||
|
natural_volcano,Волкан,13,",45,60,N3 | 23.29994,42.81578 | 0x66/0x0e | {""natural"": ""volcano""} || дий | 24.51264,43.45196 | 0x66/0x0e | {""natural"": ""volcano""} || дий | 24.51264,43.45200 | 0x66/0x0e | {""natural"": ""volcano""}"
|
||||||
|
place_locality,Място,65580,"ука | 22.33143,43.90969 | 0x66/0x00 | {""place"": ""locality""} || ин рът | 22.31954,43.91115 | 0x66/0x00 | {""place"": ""locality""} || иткин рът | 22.31134,43.90703 | 0x66/0x00 | {""place"": ""locality""}"
|
||||||
|
Reference in New Issue
Block a user