{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 01 — Příprava datasetu VisDrone\n", "\n", "**VisDrone2019-DET** je veřejně dostupný dataset leteckých snímků s anotacemi vozidel.\n", "Obsahuje třídy: `pedestrian, people, bicycle, car, van, truck, tricycle, awning-tricycle, bus, motor`.\n", "\n", "My použijeme pouze vozidlové třídy a převedeme anotace do formátu YOLO.\n", "\n", "**Citace:**\n", "> Zhu, P., Wen, L., Du, D., et al. (2021). Detection and Tracking Meet Drones Challenge. *IEEE TPAMI*." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "# Instalace závislostí\n", "%pip install ultralytics Pillow tqdm requests --quiet" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Stahuji train.zip ...\n" ] } ], "source": [ "from pathlib import Path\n", "import shutil, os, zipfile, urllib.request\n", "from tqdm.notebook import tqdm\n", "\n", "DATA_DIR = Path(\"data/visdrone\")\n", "DATA_DIR.mkdir(parents=True, exist_ok=True)\n", "\n", "# VisDrone 2019 DET — stáhni ze stránek http://aiskyeye.com\n", "# nebo z Google Drive zrcadla:\n", "SPLITS = {\n", " \"train\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-train.zip\",\n", " \"val\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-val.zip\",\n", " \"test\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-test-dev.zip\",\n", "}\n", "\n", "def download_with_progress(url: str, dest: Path):\n", " if dest.exists():\n", " print(f\" Už existuje: {dest.name}\")\n", " return\n", " print(f\"Stahuji {dest.name} ...\")\n", " urllib.request.urlretrieve(url, dest)\n", " print(f\" OK → {dest}\")\n", "\n", "for split, url in SPLITS.items():\n", " zip_path = DATA_DIR / f\"{split}.zip\"\n", " download_with_progress(url, zip_path)\n", " split_dir = DATA_DIR / split\n", " if not split_dir.exists():\n", " print(f\"Rozbaluji {zip_path.name} ...\")\n", " with zipfile.ZipFile(zip_path) as zf:\n", " zf.extractall(DATA_DIR)\n", " print(f\" Hotovo → {split_dir}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Prozkoumání struktury datasetu\n", "for split in [\"train\", \"val\", \"test\"]:\n", " d = DATA_DIR / split\n", " if d.exists():\n", " imgs = list(d.glob(\"images/*.jpg\"))\n", " anns = list(d.glob(\"annotations/*.txt\"))\n", " print(f\"{split:5s}: {len(imgs):4d} snímků, {len(anns):4d} anotací\")\n", " else:\n", " print(f\"{split}: chybí\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Ukázkový anotační soubor — formát VisDrone:\n", "# ,,,,,,,\n", "import pandas as pd\n", "\n", "sample_ann = next((DATA_DIR / \"train\" / \"annotations\").glob(\"*.txt\"))\n", "df = pd.read_csv(sample_ann, header=None,\n", " names=[\"x\",\"y\",\"w\",\"h\",\"score\",\"cat\",\"trunc\",\"occ\"])\n", "print(f\"Soubor: {sample_ann.name}\")\n", "print(df.head(10))\n", "print(\"\\nRozdělení tříd v celém souboru:\", df[\"cat\"].value_counts().to_dict())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from PIL import Image\n", "import numpy as np\n", "\n", "# VisDrone kategorie → náš mapovací slovník\n", "# 0=ignored, 1=pedestrian, 2=people, 3=bicycle, 4=car,\n", "# 5=van, 6=truck, 7=tricycle, 8=awning-tricycle, 9=bus, 10=motor, 11=others\n", "\n", "# Zachováme jen vozidlové třídy, přemapujeme na 4 naše třídy:\n", "# 0=car, 1=van, 2=truck, 3=bus\n", "VISDRONE_TO_YOLO = {\n", " 4: 0, # car → car\n", " 5: 1, # van → van\n", " 6: 2, # truck → truck\n", " 9: 3, # bus → bus\n", "}\n", "CLASS_NAMES = [\"car\", \"van\", \"truck\", \"bus\"]\n", "\n", "YOLO_DIR = Path(\"data/yolo_visdrone\")\n", "\n", "def convert_split(split: str):\n", " src_img = DATA_DIR / split / \"images\"\n", " src_ann = DATA_DIR / split / \"annotations\"\n", " dst_img = YOLO_DIR / split / \"images\"\n", " dst_lbl = YOLO_DIR / split / \"labels\"\n", " dst_img.mkdir(parents=True, exist_ok=True)\n", " dst_lbl.mkdir(parents=True, exist_ok=True)\n", "\n", " ann_files = sorted(src_ann.glob(\"*.txt\"))\n", " skipped = 0\n", "\n", " for ann_path in tqdm(ann_files, desc=split):\n", " img_path = src_img / ann_path.with_suffix(\".jpg\").name\n", " if not img_path.exists():\n", " skipped += 1\n", " continue\n", "\n", " img = Image.open(img_path)\n", " iw, ih = img.size\n", "\n", " rows = []\n", " with open(ann_path) as f:\n", " for line in f:\n", " parts = line.strip().split(\",\")\n", " if len(parts) < 6:\n", " continue\n", " x, y, w, h = int(parts[0]), int(parts[1]), int(parts[2]), int(parts[3])\n", " cat = int(parts[5])\n", " if cat not in VISDRONE_TO_YOLO:\n", " continue\n", " if w <= 0 or h <= 0:\n", " continue\n", " yolo_cls = VISDRONE_TO_YOLO[cat]\n", " cx = (x + w / 2) / iw\n", " cy = (y + h / 2) / ih\n", " nw = w / iw\n", " nh = h / ih\n", " rows.append(f\"{yolo_cls} {cx:.6f} {cy:.6f} {nw:.6f} {nh:.6f}\")\n", "\n", " # Kopiuj obrázek a ulož label (i prázdný label je OK pro neg. sample)\n", " shutil.copy(img_path, dst_img / img_path.name)\n", " lbl_out = dst_lbl / ann_path.name\n", " lbl_out.write_text(\"\\n\".join(rows))\n", "\n", " print(f\" {split}: {len(ann_files)-skipped} zpracováno, {skipped} přeskočeno\")\n", "\n", "for split in [\"train\", \"val\"]:\n", " convert_split(split)\n", "print(\"Konverze hotova!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Vytvoření YAML konfigurace pro YOLOv8\n", "import yaml\n", "\n", "cfg = {\n", " \"path\": str(YOLO_DIR.resolve()),\n", " \"train\": \"train/images\",\n", " \"val\": \"val/images\",\n", " \"names\": {i: n for i, n in enumerate(CLASS_NAMES)},\n", " \"nc\": len(CLASS_NAMES),\n", "}\n", "\n", "yaml_path = YOLO_DIR / \"dataset.yaml\"\n", "with open(yaml_path, \"w\") as f:\n", " yaml.dump(cfg, f, allow_unicode=True, sort_keys=False)\n", "\n", "print(f\"Dataset YAML: {yaml_path}\")\n", "print(yaml.dump(cfg, allow_unicode=True))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Vizualizace ukázkových anotací\n", "import matplotlib.pyplot as plt\n", "import matplotlib.patches as patches\n", "import random\n", "\n", "COLORS = [\"#00FF00\", \"#FFFF00\", \"#FF0000\", \"#0080FF\"] # car, van, truck, bus\n", "\n", "def show_sample(split=\"train\", n=4):\n", " img_dir = YOLO_DIR / split / \"images\"\n", " lbl_dir = YOLO_DIR / split / \"labels\"\n", " imgs = [p for p in img_dir.glob(\"*.jpg\") if (lbl_dir / p.with_suffix(\".txt\").name).stat().st_size > 0]\n", " samples = random.sample(imgs, min(n, len(imgs)))\n", "\n", " fig, axes = plt.subplots(1, len(samples), figsize=(5*len(samples), 5))\n", " if len(samples) == 1:\n", " axes = [axes]\n", "\n", " for ax, img_path in zip(axes, samples):\n", " img = Image.open(img_path)\n", " iw, ih = img.size\n", " ax.imshow(img)\n", " lbl_path = lbl_dir / img_path.with_suffix(\".txt\").name\n", " for line in lbl_path.read_text().splitlines():\n", " if not line.strip():\n", " continue\n", " cls, cx, cy, nw, nh = map(float, line.split())\n", " cls = int(cls)\n", " x = (cx - nw/2) * iw\n", " y = (cy - nh/2) * ih\n", " w = nw * iw\n", " h = nh * ih\n", " rect = patches.Rectangle((x, y), w, h, linewidth=1.5,\n", " edgecolor=COLORS[cls], facecolor=\"none\")\n", " ax.add_patch(rect)\n", " ax.text(x, y-2, CLASS_NAMES[cls], color=COLORS[cls], fontsize=7)\n", " ax.set_title(img_path.stem[:20], fontsize=8)\n", " ax.axis(\"off\")\n", "\n", " plt.suptitle(\"Ukázkové anotace (zelená=car, žlutá=van, červená=truck, modrá=bus)\")\n", " plt.tight_layout()\n", " plt.savefig(\"sample_annotations.png\", dpi=150, bbox_inches=\"tight\")\n", " plt.show()\n", "\n", "show_sample()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Statistiky datasetu\n", "from collections import Counter\n", "\n", "for split in [\"train\", \"val\"]:\n", " lbl_dir = YOLO_DIR / split / \"labels\"\n", " counter = Counter()\n", " n_imgs = 0\n", " for lbl in lbl_dir.glob(\"*.txt\"):\n", " lines = [l for l in lbl.read_text().splitlines() if l.strip()]\n", " for line in lines:\n", " counter[int(line.split()[0])] += 1\n", " n_imgs += 1\n", " print(f\"\\n{split} ({n_imgs} snímků):\")\n", " for cls_id, cnt in sorted(counter.items()):\n", " print(f\" {CLASS_NAMES[cls_id]:10s}: {cnt:6d}\")\n", " print(f\" {'CELKEM':10s}: {sum(counter.values()):6d}\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv (3.14.4)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.14.4" } }, "nbformat": 4, "nbformat_minor": 4 }