rnd-v1
This commit is contained in:
150
bgtopo_poc/coordinates.py
Normal file
150
bgtopo_poc/coordinates.py
Normal file
@@ -0,0 +1,150 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from PIL import Image, ImageDraw
|
||||
from pyproj import Transformer
|
||||
from rasterio.windows import Window
|
||||
|
||||
from .georef import open_georaster, read_window_rgb
|
||||
from .utils import ensure_dir
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _normalize_coordinate_columns(df: pd.DataFrame) -> pd.DataFrame:
|
||||
cols = {c.lower().strip(): c for c in df.columns}
|
||||
lat_col = cols.get("lat") or cols.get("latitude") or cols.get("y")
|
||||
lon_col = cols.get("lon") or cols.get("lng") or cols.get("longitude") or cols.get("x")
|
||||
if not lat_col or not lon_col:
|
||||
raise ValueError("Coordinate CSV needs lat/lon columns, or latitude/longitude, or y/x.")
|
||||
out = df.copy()
|
||||
out["lat"] = pd.to_numeric(out[lat_col], errors="coerce")
|
||||
out["lon"] = pd.to_numeric(out[lon_col], errors="coerce")
|
||||
if "id" not in out.columns:
|
||||
out["id"] = [f"pt_{i:06d}" for i in range(len(out))]
|
||||
return out.dropna(subset=["lat", "lon"])
|
||||
|
||||
|
||||
def load_coordinates(path: str | Path) -> pd.DataFrame:
|
||||
return _normalize_coordinate_columns(pd.read_csv(path))
|
||||
|
||||
|
||||
def coord_to_rowcol(ds, lon: float, lat: float, coord_crs: str = "EPSG:4326") -> Optional[tuple[int, int]]:
|
||||
if ds.crs is None:
|
||||
return None
|
||||
try:
|
||||
transformer = Transformer.from_crs(coord_crs, ds.crs, always_xy=True)
|
||||
x, y = transformer.transform(lon, lat)
|
||||
row, col = ds.index(x, y)
|
||||
return int(row), int(col)
|
||||
except Exception as e: # noqa: BLE001
|
||||
LOG.debug("coord_to_rowcol failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def score_coordinates_for_sheet(
|
||||
coord_csv: str | Path,
|
||||
candidates_csv: str | Path,
|
||||
map_path: str | None,
|
||||
tif_path: str | None,
|
||||
sheet_id: str,
|
||||
cfg: Dict,
|
||||
out_dir: str | Path,
|
||||
coord_crs: str = "EPSG:4326",
|
||||
) -> Path:
|
||||
out_dir = ensure_dir(out_dir)
|
||||
coords = load_coordinates(coord_csv)
|
||||
cands = pd.read_csv(candidates_csv)
|
||||
rh = open_georaster(map_path=map_path, tif_path=tif_path)
|
||||
radius = float(cfg["coordinate_scoring"].get("search_radius_px", 45))
|
||||
try:
|
||||
rows: List[dict] = []
|
||||
for _, pt in coords.iterrows():
|
||||
rc = coord_to_rowcol(rh.dataset, float(pt.lon), float(pt.lat), coord_crs=coord_crs)
|
||||
if rc is None:
|
||||
continue
|
||||
row, col = rc
|
||||
if row < 0 or col < 0 or row >= rh.height or col >= rh.width:
|
||||
continue
|
||||
if cands.empty:
|
||||
nearest = None
|
||||
else:
|
||||
dx = cands["cx"].astype(float).to_numpy() - col
|
||||
dy = cands["cy"].astype(float).to_numpy() - row
|
||||
dist = np.sqrt(dx * dx + dy * dy)
|
||||
i = int(np.argmin(dist))
|
||||
nearest = (i, float(dist[i]))
|
||||
score = 0.0
|
||||
nearest_id = None
|
||||
nearest_dist = None
|
||||
nearest_style = None
|
||||
nearest_det_score = None
|
||||
decision = "auto_negative"
|
||||
if nearest:
|
||||
i, d = nearest
|
||||
nearest_dist = d
|
||||
if d <= radius:
|
||||
nearest_id = i
|
||||
nearest_style = str(cands.iloc[i].get("fill_style", "unknown"))
|
||||
nearest_det_score = float(cands.iloc[i].get("score", 0.0))
|
||||
dist_factor = max(0.0, 1.0 - d / radius)
|
||||
score = float(0.55 * nearest_det_score + 0.45 * dist_factor)
|
||||
if score >= float(cfg["coordinate_scoring"].get("strong_score", 0.90)):
|
||||
decision = "auto_positive"
|
||||
elif score >= float(cfg["coordinate_scoring"].get("weak_score", 0.40)):
|
||||
decision = "review"
|
||||
rows.append({
|
||||
"id": pt.id,
|
||||
"sheet_id": sheet_id,
|
||||
"lat": float(pt.lat),
|
||||
"lon": float(pt.lon),
|
||||
"row": row,
|
||||
"col": col,
|
||||
"nearest_candidate_index": nearest_id,
|
||||
"nearest_candidate_distance_px": nearest_dist,
|
||||
"nearest_candidate_style": nearest_style,
|
||||
"nearest_candidate_score": nearest_det_score,
|
||||
"coordinate_score": score,
|
||||
"decision": decision,
|
||||
})
|
||||
out_csv = Path(out_dir) / f"{sheet_id}_coordinate_scores.csv"
|
||||
pd.DataFrame(rows).to_csv(out_csv, index=False)
|
||||
LOG.info("Wrote coordinate scores: %s", out_csv)
|
||||
return out_csv
|
||||
finally:
|
||||
rh.close()
|
||||
|
||||
|
||||
def extract_coordinate_crops(
|
||||
coord_scores_csv: str | Path,
|
||||
map_path: str | None,
|
||||
tif_path: str | None,
|
||||
out_dir: str | Path,
|
||||
crop_size: int = 256,
|
||||
only_decisions: tuple[str, ...] = ("review", "auto_positive"),
|
||||
) -> Path:
|
||||
out_dir = ensure_dir(out_dir)
|
||||
df = pd.read_csv(coord_scores_csv)
|
||||
rh = open_georaster(map_path=map_path, tif_path=tif_path)
|
||||
half = crop_size // 2
|
||||
try:
|
||||
for _, r in df.iterrows():
|
||||
if str(r.decision) not in only_decisions:
|
||||
continue
|
||||
row, col = int(r.row), int(r.col)
|
||||
win = Window(col - half, row - half, crop_size, crop_size)
|
||||
rgb = read_window_rgb(rh.dataset, win)
|
||||
img = Image.fromarray(rgb).convert("RGB")
|
||||
draw = ImageDraw.Draw(img)
|
||||
draw.ellipse([half - 5, half - 5, half + 5, half + 5], outline=(255, 0, 0), width=2)
|
||||
name = f"{str(r.id)}__{str(r.decision)}__score_{float(r.coordinate_score):.3f}.png"
|
||||
img.save(Path(out_dir) / name)
|
||||
LOG.info("Wrote crops into: %s", out_dir)
|
||||
return Path(out_dir)
|
||||
finally:
|
||||
rh.close()
|
||||
Reference in New Issue
Block a user