151 lines
5.6 KiB
Python
151 lines
5.6 KiB
Python
from __future__ import annotations
|
|
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from PIL import Image, ImageDraw
|
|
from pyproj import Transformer
|
|
from rasterio.windows import Window
|
|
|
|
from .georef import open_georaster, read_window_rgb
|
|
from .utils import ensure_dir
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
def _normalize_coordinate_columns(df: pd.DataFrame) -> pd.DataFrame:
|
|
cols = {c.lower().strip(): c for c in df.columns}
|
|
lat_col = cols.get("lat") or cols.get("latitude") or cols.get("y")
|
|
lon_col = cols.get("lon") or cols.get("lng") or cols.get("longitude") or cols.get("x")
|
|
if not lat_col or not lon_col:
|
|
raise ValueError("Coordinate CSV needs lat/lon columns, or latitude/longitude, or y/x.")
|
|
out = df.copy()
|
|
out["lat"] = pd.to_numeric(out[lat_col], errors="coerce")
|
|
out["lon"] = pd.to_numeric(out[lon_col], errors="coerce")
|
|
if "id" not in out.columns:
|
|
out["id"] = [f"pt_{i:06d}" for i in range(len(out))]
|
|
return out.dropna(subset=["lat", "lon"])
|
|
|
|
|
|
def load_coordinates(path: str | Path) -> pd.DataFrame:
|
|
return _normalize_coordinate_columns(pd.read_csv(path))
|
|
|
|
|
|
def coord_to_rowcol(ds, lon: float, lat: float, coord_crs: str = "EPSG:4326") -> Optional[tuple[int, int]]:
|
|
if ds.crs is None:
|
|
return None
|
|
try:
|
|
transformer = Transformer.from_crs(coord_crs, ds.crs, always_xy=True)
|
|
x, y = transformer.transform(lon, lat)
|
|
row, col = ds.index(x, y)
|
|
return int(row), int(col)
|
|
except Exception as e: # noqa: BLE001
|
|
LOG.debug("coord_to_rowcol failed: %s", e)
|
|
return None
|
|
|
|
|
|
def score_coordinates_for_sheet(
|
|
coord_csv: str | Path,
|
|
candidates_csv: str | Path,
|
|
map_path: str | None,
|
|
tif_path: str | None,
|
|
sheet_id: str,
|
|
cfg: Dict,
|
|
out_dir: str | Path,
|
|
coord_crs: str = "EPSG:4326",
|
|
) -> Path:
|
|
out_dir = ensure_dir(out_dir)
|
|
coords = load_coordinates(coord_csv)
|
|
cands = pd.read_csv(candidates_csv)
|
|
rh = open_georaster(map_path=map_path, tif_path=tif_path)
|
|
radius = float(cfg["coordinate_scoring"].get("search_radius_px", 45))
|
|
try:
|
|
rows: List[dict] = []
|
|
for _, pt in coords.iterrows():
|
|
rc = coord_to_rowcol(rh.dataset, float(pt.lon), float(pt.lat), coord_crs=coord_crs)
|
|
if rc is None:
|
|
continue
|
|
row, col = rc
|
|
if row < 0 or col < 0 or row >= rh.height or col >= rh.width:
|
|
continue
|
|
if cands.empty:
|
|
nearest = None
|
|
else:
|
|
dx = cands["cx"].astype(float).to_numpy() - col
|
|
dy = cands["cy"].astype(float).to_numpy() - row
|
|
dist = np.sqrt(dx * dx + dy * dy)
|
|
i = int(np.argmin(dist))
|
|
nearest = (i, float(dist[i]))
|
|
score = 0.0
|
|
nearest_id = None
|
|
nearest_dist = None
|
|
nearest_style = None
|
|
nearest_det_score = None
|
|
decision = "auto_negative"
|
|
if nearest:
|
|
i, d = nearest
|
|
nearest_dist = d
|
|
if d <= radius:
|
|
nearest_id = i
|
|
nearest_style = str(cands.iloc[i].get("fill_style", "unknown"))
|
|
nearest_det_score = float(cands.iloc[i].get("score", 0.0))
|
|
dist_factor = max(0.0, 1.0 - d / radius)
|
|
score = float(0.55 * nearest_det_score + 0.45 * dist_factor)
|
|
if score >= float(cfg["coordinate_scoring"].get("strong_score", 0.90)):
|
|
decision = "auto_positive"
|
|
elif score >= float(cfg["coordinate_scoring"].get("weak_score", 0.40)):
|
|
decision = "review"
|
|
rows.append({
|
|
"id": pt.id,
|
|
"sheet_id": sheet_id,
|
|
"lat": float(pt.lat),
|
|
"lon": float(pt.lon),
|
|
"row": row,
|
|
"col": col,
|
|
"nearest_candidate_index": nearest_id,
|
|
"nearest_candidate_distance_px": nearest_dist,
|
|
"nearest_candidate_style": nearest_style,
|
|
"nearest_candidate_score": nearest_det_score,
|
|
"coordinate_score": score,
|
|
"decision": decision,
|
|
})
|
|
out_csv = Path(out_dir) / f"{sheet_id}_coordinate_scores.csv"
|
|
pd.DataFrame(rows).to_csv(out_csv, index=False)
|
|
LOG.info("Wrote coordinate scores: %s", out_csv)
|
|
return out_csv
|
|
finally:
|
|
rh.close()
|
|
|
|
|
|
def extract_coordinate_crops(
|
|
coord_scores_csv: str | Path,
|
|
map_path: str | None,
|
|
tif_path: str | None,
|
|
out_dir: str | Path,
|
|
crop_size: int = 256,
|
|
only_decisions: tuple[str, ...] = ("review", "auto_positive"),
|
|
) -> Path:
|
|
out_dir = ensure_dir(out_dir)
|
|
df = pd.read_csv(coord_scores_csv)
|
|
rh = open_georaster(map_path=map_path, tif_path=tif_path)
|
|
half = crop_size // 2
|
|
try:
|
|
for _, r in df.iterrows():
|
|
if str(r.decision) not in only_decisions:
|
|
continue
|
|
row, col = int(r.row), int(r.col)
|
|
win = Window(col - half, row - half, crop_size, crop_size)
|
|
rgb = read_window_rgb(rh.dataset, win)
|
|
img = Image.fromarray(rgb).convert("RGB")
|
|
draw = ImageDraw.Draw(img)
|
|
draw.ellipse([half - 5, half - 5, half + 5, half + 5], outline=(255, 0, 0), width=2)
|
|
name = f"{str(r.id)}__{str(r.decision)}__score_{float(r.coordinate_score):.3f}.png"
|
|
img.save(Path(out_dir) / name)
|
|
LOG.info("Wrote crops into: %s", out_dir)
|
|
return Path(out_dir)
|
|
finally:
|
|
rh.close()
|