Files
garmin-img-format-parsing/bgtopo_poc/coordinates.py
2026-05-03 21:58:47 +03:00

151 lines
5.6 KiB
Python

from __future__ import annotations
import logging
from pathlib import Path
from typing import Dict, List, Optional
import numpy as np
import pandas as pd
from PIL import Image, ImageDraw
from pyproj import Transformer
from rasterio.windows import Window
from .georef import open_georaster, read_window_rgb
from .utils import ensure_dir
LOG = logging.getLogger(__name__)
def _normalize_coordinate_columns(df: pd.DataFrame) -> pd.DataFrame:
cols = {c.lower().strip(): c for c in df.columns}
lat_col = cols.get("lat") or cols.get("latitude") or cols.get("y")
lon_col = cols.get("lon") or cols.get("lng") or cols.get("longitude") or cols.get("x")
if not lat_col or not lon_col:
raise ValueError("Coordinate CSV needs lat/lon columns, or latitude/longitude, or y/x.")
out = df.copy()
out["lat"] = pd.to_numeric(out[lat_col], errors="coerce")
out["lon"] = pd.to_numeric(out[lon_col], errors="coerce")
if "id" not in out.columns:
out["id"] = [f"pt_{i:06d}" for i in range(len(out))]
return out.dropna(subset=["lat", "lon"])
def load_coordinates(path: str | Path) -> pd.DataFrame:
return _normalize_coordinate_columns(pd.read_csv(path))
def coord_to_rowcol(ds, lon: float, lat: float, coord_crs: str = "EPSG:4326") -> Optional[tuple[int, int]]:
if ds.crs is None:
return None
try:
transformer = Transformer.from_crs(coord_crs, ds.crs, always_xy=True)
x, y = transformer.transform(lon, lat)
row, col = ds.index(x, y)
return int(row), int(col)
except Exception as e: # noqa: BLE001
LOG.debug("coord_to_rowcol failed: %s", e)
return None
def score_coordinates_for_sheet(
coord_csv: str | Path,
candidates_csv: str | Path,
map_path: str | None,
tif_path: str | None,
sheet_id: str,
cfg: Dict,
out_dir: str | Path,
coord_crs: str = "EPSG:4326",
) -> Path:
out_dir = ensure_dir(out_dir)
coords = load_coordinates(coord_csv)
cands = pd.read_csv(candidates_csv)
rh = open_georaster(map_path=map_path, tif_path=tif_path)
radius = float(cfg["coordinate_scoring"].get("search_radius_px", 45))
try:
rows: List[dict] = []
for _, pt in coords.iterrows():
rc = coord_to_rowcol(rh.dataset, float(pt.lon), float(pt.lat), coord_crs=coord_crs)
if rc is None:
continue
row, col = rc
if row < 0 or col < 0 or row >= rh.height or col >= rh.width:
continue
if cands.empty:
nearest = None
else:
dx = cands["cx"].astype(float).to_numpy() - col
dy = cands["cy"].astype(float).to_numpy() - row
dist = np.sqrt(dx * dx + dy * dy)
i = int(np.argmin(dist))
nearest = (i, float(dist[i]))
score = 0.0
nearest_id = None
nearest_dist = None
nearest_style = None
nearest_det_score = None
decision = "auto_negative"
if nearest:
i, d = nearest
nearest_dist = d
if d <= radius:
nearest_id = i
nearest_style = str(cands.iloc[i].get("fill_style", "unknown"))
nearest_det_score = float(cands.iloc[i].get("score", 0.0))
dist_factor = max(0.0, 1.0 - d / radius)
score = float(0.55 * nearest_det_score + 0.45 * dist_factor)
if score >= float(cfg["coordinate_scoring"].get("strong_score", 0.90)):
decision = "auto_positive"
elif score >= float(cfg["coordinate_scoring"].get("weak_score", 0.40)):
decision = "review"
rows.append({
"id": pt.id,
"sheet_id": sheet_id,
"lat": float(pt.lat),
"lon": float(pt.lon),
"row": row,
"col": col,
"nearest_candidate_index": nearest_id,
"nearest_candidate_distance_px": nearest_dist,
"nearest_candidate_style": nearest_style,
"nearest_candidate_score": nearest_det_score,
"coordinate_score": score,
"decision": decision,
})
out_csv = Path(out_dir) / f"{sheet_id}_coordinate_scores.csv"
pd.DataFrame(rows).to_csv(out_csv, index=False)
LOG.info("Wrote coordinate scores: %s", out_csv)
return out_csv
finally:
rh.close()
def extract_coordinate_crops(
coord_scores_csv: str | Path,
map_path: str | None,
tif_path: str | None,
out_dir: str | Path,
crop_size: int = 256,
only_decisions: tuple[str, ...] = ("review", "auto_positive"),
) -> Path:
out_dir = ensure_dir(out_dir)
df = pd.read_csv(coord_scores_csv)
rh = open_georaster(map_path=map_path, tif_path=tif_path)
half = crop_size // 2
try:
for _, r in df.iterrows():
if str(r.decision) not in only_decisions:
continue
row, col = int(r.row), int(r.col)
win = Window(col - half, row - half, crop_size, crop_size)
rgb = read_window_rgb(rh.dataset, win)
img = Image.fromarray(rgb).convert("RGB")
draw = ImageDraw.Draw(img)
draw.ellipse([half - 5, half - 5, half + 5, half + 5], outline=(255, 0, 0), width=2)
name = f"{str(r.id)}__{str(r.decision)}__score_{float(r.coordinate_score):.3f}.png"
img.save(Path(out_dir) / name)
LOG.info("Wrote crops into: %s", out_dir)
return Path(out_dir)
finally:
rh.close()