from __future__ import annotations import logging from pathlib import Path from typing import Dict, List, Optional import numpy as np import pandas as pd from PIL import Image, ImageDraw from pyproj import Transformer from rasterio.windows import Window from .georef import open_georaster, read_window_rgb from .utils import ensure_dir LOG = logging.getLogger(__name__) def _normalize_coordinate_columns(df: pd.DataFrame) -> pd.DataFrame: cols = {c.lower().strip(): c for c in df.columns} lat_col = cols.get("lat") or cols.get("latitude") or cols.get("y") lon_col = cols.get("lon") or cols.get("lng") or cols.get("longitude") or cols.get("x") if not lat_col or not lon_col: raise ValueError("Coordinate CSV needs lat/lon columns, or latitude/longitude, or y/x.") out = df.copy() out["lat"] = pd.to_numeric(out[lat_col], errors="coerce") out["lon"] = pd.to_numeric(out[lon_col], errors="coerce") if "id" not in out.columns: out["id"] = [f"pt_{i:06d}" for i in range(len(out))] return out.dropna(subset=["lat", "lon"]) def load_coordinates(path: str | Path) -> pd.DataFrame: return _normalize_coordinate_columns(pd.read_csv(path)) def coord_to_rowcol(ds, lon: float, lat: float, coord_crs: str = "EPSG:4326") -> Optional[tuple[int, int]]: if ds.crs is None: return None try: transformer = Transformer.from_crs(coord_crs, ds.crs, always_xy=True) x, y = transformer.transform(lon, lat) row, col = ds.index(x, y) return int(row), int(col) except Exception as e: # noqa: BLE001 LOG.debug("coord_to_rowcol failed: %s", e) return None def score_coordinates_for_sheet( coord_csv: str | Path, candidates_csv: str | Path, map_path: str | None, tif_path: str | None, sheet_id: str, cfg: Dict, out_dir: str | Path, coord_crs: str = "EPSG:4326", ) -> Path: out_dir = ensure_dir(out_dir) coords = load_coordinates(coord_csv) cands = pd.read_csv(candidates_csv) rh = open_georaster(map_path=map_path, tif_path=tif_path) radius = float(cfg["coordinate_scoring"].get("search_radius_px", 45)) try: rows: List[dict] = [] for _, pt in coords.iterrows(): rc = coord_to_rowcol(rh.dataset, float(pt.lon), float(pt.lat), coord_crs=coord_crs) if rc is None: continue row, col = rc if row < 0 or col < 0 or row >= rh.height or col >= rh.width: continue if cands.empty: nearest = None else: dx = cands["cx"].astype(float).to_numpy() - col dy = cands["cy"].astype(float).to_numpy() - row dist = np.sqrt(dx * dx + dy * dy) i = int(np.argmin(dist)) nearest = (i, float(dist[i])) score = 0.0 nearest_id = None nearest_dist = None nearest_style = None nearest_det_score = None decision = "auto_negative" if nearest: i, d = nearest nearest_dist = d if d <= radius: nearest_id = i nearest_style = str(cands.iloc[i].get("fill_style", "unknown")) nearest_det_score = float(cands.iloc[i].get("score", 0.0)) dist_factor = max(0.0, 1.0 - d / radius) score = float(0.55 * nearest_det_score + 0.45 * dist_factor) if score >= float(cfg["coordinate_scoring"].get("strong_score", 0.90)): decision = "auto_positive" elif score >= float(cfg["coordinate_scoring"].get("weak_score", 0.40)): decision = "review" rows.append({ "id": pt.id, "sheet_id": sheet_id, "lat": float(pt.lat), "lon": float(pt.lon), "row": row, "col": col, "nearest_candidate_index": nearest_id, "nearest_candidate_distance_px": nearest_dist, "nearest_candidate_style": nearest_style, "nearest_candidate_score": nearest_det_score, "coordinate_score": score, "decision": decision, }) out_csv = Path(out_dir) / f"{sheet_id}_coordinate_scores.csv" pd.DataFrame(rows).to_csv(out_csv, index=False) LOG.info("Wrote coordinate scores: %s", out_csv) return out_csv finally: rh.close() def extract_coordinate_crops( coord_scores_csv: str | Path, map_path: str | None, tif_path: str | None, out_dir: str | Path, crop_size: int = 256, only_decisions: tuple[str, ...] = ("review", "auto_positive"), ) -> Path: out_dir = ensure_dir(out_dir) df = pd.read_csv(coord_scores_csv) rh = open_georaster(map_path=map_path, tif_path=tif_path) half = crop_size // 2 try: for _, r in df.iterrows(): if str(r.decision) not in only_decisions: continue row, col = int(r.row), int(r.col) win = Window(col - half, row - half, crop_size, crop_size) rgb = read_window_rgb(rh.dataset, win) img = Image.fromarray(rgb).convert("RGB") draw = ImageDraw.Draw(img) draw.ellipse([half - 5, half - 5, half + 5, half + 5], outline=(255, 0, 0), width=2) name = f"{str(r.id)}__{str(r.decision)}__score_{float(r.coordinate_score):.3f}.png" img.save(Path(out_dir) / name) LOG.info("Wrote crops into: %s", out_dir) return Path(out_dir) finally: rh.close()