This commit is contained in:
Lukáš Trkan
2026-04-22 13:19:46 +02:00
parent d7620979ee
commit 46492f2e23
3113 changed files with 2911 additions and 186 deletions

120
prepare_dataset.py Normal file
View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python3
"""
Připraví dataset pro trénink na serveru.
Vždy zahrne: data/lanov_custom/
Podmíněně: data/hk_custom/ (pokud existuje)
data/yolo_visdrone/ (pokud existuje, jen třídy 03)
Výstup: dataset_lanov.zip
lanov/train/images+labels, val/images+labels, dataset.yaml
best.pt
"""
import random, shutil, zipfile
from pathlib import Path
VAL_RATIO = 0.2
BG_PER_SPLIT = 50
SEED = 42
CLASS_NAMES = ["car", "van", "truck", "bus"]
OUT = Path("dataset_lanov")
ZIP_OUT = Path("dataset_lanov.zip")
random.seed(SEED)
# ---------------------------------------------------------------------------
# Zdroje dat (img_dir, lbl_dir, prefix pro unikátní názvy souborů)
# ---------------------------------------------------------------------------
SOURCES = [
("data/lanov_custom/train/images", "data/lanov_custom/train/labels", "lanov"),
]
# Podmíněné zdroje — stejný YOLO formát, stejné třídy
for name, path in [
("hk", "data/hk_custom/train"),
("visdrone", "data/yolo_visdrone/train"),
]:
img_dir = Path(path) / "images"
lbl_dir = Path(path) / "labels"
if img_dir.exists() and lbl_dir.exists():
n = sum(1 for l in lbl_dir.glob("*.txt") if l.stat().st_size > 0)
print(f"+ {name}: {n} anotovaných dlaždic")
SOURCES.append((str(img_dir), str(lbl_dir), name))
else:
print(f"- {name}: nenalezeno, přeskočeno")
# ---------------------------------------------------------------------------
# Načtení všech labelů ze všech zdrojů
# ---------------------------------------------------------------------------
all_annotated = [] # (lbl_path, img_path, prefix)
all_background = []
for img_dir, lbl_dir, prefix in SOURCES:
img_dir = Path(img_dir)
lbl_dir = Path(lbl_dir)
for lbl in sorted(lbl_dir.glob("*.txt")):
img = img_dir / (lbl.stem + ".jpg")
if not img.exists():
img = img_dir / (lbl.stem + ".png")
entry = (lbl, img if img.exists() else None, prefix)
if lbl.stat().st_size > 0:
all_annotated.append(entry)
else:
all_background.append(entry)
random.shuffle(all_annotated)
random.shuffle(all_background)
n_val = max(1, int(len(all_annotated) * VAL_RATIO))
val_ann = all_annotated[:n_val]
train_ann = all_annotated[n_val:]
train_bg = all_background[:BG_PER_SPLIT]
val_bg = all_background[BG_PER_SPLIT : BG_PER_SPLIT * 2]
print(f"\nAnotované: {len(all_annotated)} (train {len(train_ann)}, val {len(val_ann)})")
print(f"Background: {len(all_background)} (train {len(train_bg)}, val {len(val_bg)})")
# ---------------------------------------------------------------------------
# Kopírování do OUT/
# ---------------------------------------------------------------------------
if OUT.exists():
shutil.rmtree(OUT)
for split_name, split in [("train", train_ann + train_bg), ("val", val_ann + val_bg)]:
img_out = OUT / split_name / "images"
lbl_out = OUT / split_name / "labels"
img_out.mkdir(parents=True, exist_ok=True)
lbl_out.mkdir(parents=True, exist_ok=True)
for lbl_path, img_path, prefix in split:
stem = f"{prefix}_{lbl_path.stem}"
shutil.copy2(lbl_path, lbl_out / f"{stem}.txt")
if img_path:
shutil.copy2(img_path, img_out / f"{stem}{img_path.suffix}")
# ---------------------------------------------------------------------------
# dataset.yaml
# ---------------------------------------------------------------------------
(OUT / "dataset.yaml").write_text(
f"path: .\ntrain: train/images\nval: val/images\nnc: {len(CLASS_NAMES)}\nnames: {CLASS_NAMES}\n"
)
# ---------------------------------------------------------------------------
# ZIP
# ---------------------------------------------------------------------------
if ZIP_OUT.exists():
ZIP_OUT.unlink()
weights = Path("best.pt")
with zipfile.ZipFile(ZIP_OUT, "w", zipfile.ZIP_DEFLATED) as zf:
for f in sorted(OUT.rglob("*")):
if f.is_file():
zf.write(f, "lanov/" + str(f.relative_to(OUT)))
if weights.exists():
zf.write(weights, "best.pt")
print("Přidáno: best.pt")
size_mb = ZIP_OUT.stat().st_size / 1e6
print(f"\nHotovo: {ZIP_OUT} ({size_mb:.1f} MB)")
print(f"\nNa serveru spusť:")
print(f" unzip dataset_lanov.zip")
print(f" pip install ultralytics")
print(f" yolo train model=best.pt data=lanov/dataset.yaml epochs=50 imgsz=256 batch=32")