rnd-v1
This commit is contained in:
117
bgtopo_poc/export_yolo.py
Normal file
117
bgtopo_poc/export_yolo.py
Normal file
@@ -0,0 +1,117 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import random
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
import pandas as pd
|
||||
from PIL import Image
|
||||
|
||||
from .utils import ensure_dir
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
STYLE_TO_CLASS = {
|
||||
"unknown": 0,
|
||||
"filled": 1,
|
||||
"hollow": 2,
|
||||
"border": 3,
|
||||
}
|
||||
|
||||
|
||||
def _crop_image_and_labels(img: Image.Image, boxes: pd.DataFrame, x0: int, y0: int, size: int):
|
||||
crop = img.crop((x0, y0, x0 + size, y0 + size)).convert("RGB")
|
||||
labels = []
|
||||
for _, r in boxes.iterrows():
|
||||
bx1, by1, bx2, by2 = float(r.x), float(r.y), float(r.x + r.w), float(r.y + r.h)
|
||||
ix1, iy1 = max(bx1, x0), max(by1, y0)
|
||||
ix2, iy2 = min(bx2, x0 + size), min(by2, y0 + size)
|
||||
if ix2 <= ix1 or iy2 <= iy1:
|
||||
continue
|
||||
visible_area = (ix2 - ix1) * (iy2 - iy1)
|
||||
box_area = max((bx2 - bx1) * (by2 - by1), 1)
|
||||
if visible_area / box_area < 0.35:
|
||||
continue
|
||||
cx = ((ix1 + ix2) / 2 - x0) / size
|
||||
cy = ((iy1 + iy2) / 2 - y0) / size
|
||||
w = (ix2 - ix1) / size
|
||||
h = (iy2 - iy1) / size
|
||||
cls = STYLE_TO_CLASS.get(str(r.get("fill_style", "unknown")), 0)
|
||||
labels.append(f"{cls} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}")
|
||||
return crop, labels
|
||||
|
||||
|
||||
def export_candidates_to_yolo(
|
||||
tif_path: str | Path,
|
||||
candidates_csv: str | Path,
|
||||
out_dir: str | Path,
|
||||
cfg: Dict,
|
||||
sheet_id: str,
|
||||
tile_size: int = 1024,
|
||||
overlap: int = 128,
|
||||
val_fraction: float = 0.20,
|
||||
include_empty_tiles: bool = True,
|
||||
max_empty_tiles: int = 250,
|
||||
) -> Path:
|
||||
out_dir = Path(out_dir)
|
||||
for split in ["train", "val"]:
|
||||
ensure_dir(out_dir / "images" / split)
|
||||
ensure_dir(out_dir / "labels" / split)
|
||||
|
||||
img = Image.open(tif_path).convert("RGB")
|
||||
boxes = pd.read_csv(candidates_csv)
|
||||
step = max(1, tile_size - overlap)
|
||||
random.seed(42)
|
||||
empty_written = 0
|
||||
total_written = 0
|
||||
|
||||
for y0 in range(0, max(1, img.height - tile_size + 1), step):
|
||||
for x0 in range(0, max(1, img.width - tile_size + 1), step):
|
||||
in_tile = boxes[
|
||||
(boxes.cx >= x0) & (boxes.cx < x0 + tile_size) &
|
||||
(boxes.cy >= y0) & (boxes.cy < y0 + tile_size)
|
||||
]
|
||||
if in_tile.empty:
|
||||
if not include_empty_tiles or empty_written >= max_empty_tiles:
|
||||
continue
|
||||
# Keep some empty/hard-negative tiles to stop the model from detecting all blue map details.
|
||||
if random.random() > 0.08:
|
||||
continue
|
||||
empty_written += 1
|
||||
crop, labels = _crop_image_and_labels(img, boxes, x0, y0, tile_size)
|
||||
split = "val" if random.random() < val_fraction else "train"
|
||||
stem = f"{sheet_id}_{x0}_{y0}"
|
||||
crop.save(out_dir / "images" / split / f"{stem}.jpg", quality=92)
|
||||
with open(out_dir / "labels" / split / f"{stem}.txt", "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(labels))
|
||||
total_written += 1
|
||||
|
||||
data_yaml = out_dir / "data.yaml"
|
||||
names = cfg.get("export", {}).get("yolo_class_names", ["blue_rect_unknown", "blue_rect_filled", "blue_rect_hollow", "blue_rect_border"])
|
||||
with open(data_yaml, "w", encoding="utf-8") as f:
|
||||
f.write(f"path: {out_dir.resolve()}\n")
|
||||
f.write("train: images/train\n")
|
||||
f.write("val: images/val\n")
|
||||
f.write("names:\n")
|
||||
for i, name in enumerate(names):
|
||||
f.write(f" {i}: {name}\n")
|
||||
LOG.info("YOLO export complete: %s (%d tiles)", data_yaml, total_written)
|
||||
return data_yaml
|
||||
|
||||
|
||||
def merge_yolo_datasets(src_dirs: list[str | Path], out_dir: str | Path) -> Path:
|
||||
out_dir = Path(out_dir)
|
||||
for split in ["train", "val"]:
|
||||
ensure_dir(out_dir / "images" / split)
|
||||
ensure_dir(out_dir / "labels" / split)
|
||||
for src in src_dirs:
|
||||
src = Path(src)
|
||||
for split in ["train", "val"]:
|
||||
for img in (src / "images" / split).glob("*.jpg"):
|
||||
shutil.copy2(img, out_dir / "images" / split / img.name)
|
||||
for lab in (src / "labels" / split).glob("*.txt"):
|
||||
shutil.copy2(lab, out_dir / "labels" / split / lab.name)
|
||||
return out_dir
|
||||
Reference in New Issue
Block a user