{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 01 — Příprava datasetu VisDrone\n", "\n", "**VisDrone2019-DET** je veřejně dostupný dataset leteckých snímků s anotacemi vozidel.\n", "Obsahuje třídy: `pedestrian, people, bicycle, car, van, truck, tricycle, awning-tricycle, bus, motor`.\n", "\n", "My použijeme pouze vozidlové třídy a převedeme anotace do formátu YOLO.\n", "\n", "**Citace:**\n", "> Zhu, P., Wen, L., Du, D., et al. (2021). Detection and Tracking Meet Drones Challenge. *IEEE TPAMI*." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "%pip install ultralytics Pillow tqdm requests pyyaml pandas matplotlib --quiet" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " train: složka existuje → data/visdrone/VisDrone2019-DET-train\n", " val: složka existuje → data/visdrone/VisDrone2019-DET-val\n", " test: složka existuje → data/visdrone/VisDrone2019-DET-test-dev\n", "Dataset připraven.\n" ] } ], "source": [ "from pathlib import Path\n", "import shutil, zipfile, urllib.request\n", "from tqdm.auto import tqdm\n", "\n", "DATA_DIR = Path(\"data/visdrone\")\n", "DATA_DIR.mkdir(parents=True, exist_ok=True)\n", "\n", "# Mapování split → název složky (VisDrone používá plné názvy)\n", "SPLIT_DIRS = {\n", " \"train\": DATA_DIR / \"VisDrone2019-DET-train\",\n", " \"val\": DATA_DIR / \"VisDrone2019-DET-val\",\n", " \"test\": DATA_DIR / \"VisDrone2019-DET-test-dev\",\n", "}\n", "\n", "# URLs pro stažení (pokud složky ještě neexistují)\n", "SPLIT_URLS = {\n", " \"train\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-train.zip\",\n", " \"val\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-val.zip\",\n", " \"test\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-test-dev.zip\",\n", "}\n", "\n", "for split, split_dir in SPLIT_DIRS.items():\n", " if split_dir.exists():\n", " print(f\" {split}: složka existuje → {split_dir}\")\n", " continue\n", " zip_path = DATA_DIR / f\"{split}.zip\"\n", " if not zip_path.exists():\n", " print(f\"Stahuji {split}.zip ...\")\n", " urllib.request.urlretrieve(SPLIT_URLS[split], zip_path)\n", " print(f\"Rozbaluji {zip_path.name} ...\")\n", " with zipfile.ZipFile(zip_path) as zf:\n", " zf.extractall(DATA_DIR)\n", " print(f\" OK → {split_dir}\")\n", "\n", "print(\"Dataset připraven.\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "train: 6471 snímků, 6471 anotací → VisDrone2019-DET-train\n", "val : 548 snímků, 548 anotací → VisDrone2019-DET-val\n", "test : 1610 snímků, 1610 anotací → VisDrone2019-DET-test-dev\n" ] } ], "source": [ "# Prozkoumání struktury datasetu\n", "for split, split_dir in SPLIT_DIRS.items():\n", " if split_dir.exists():\n", " imgs = list((split_dir / \"images\").glob(\"*.jpg\"))\n", " anns = list((split_dir / \"annotations\").glob(\"*.txt\"))\n", " print(f\"{split:5s}: {len(imgs):4d} snímků, {len(anns):4d} anotací → {split_dir.name}\")\n", " else:\n", " print(f\"{split}: CHYBÍ ({split_dir})\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Soubor: 9999943_00000_d_0000051.txt\n", " x y w h score cat trunc occ\n", "0 266 41 43 33 1 4 0 0\n", "1 243 4 41 35 1 4 0 0\n", "2 93 307 46 48 1 5 0 0\n", "3 125 307 28 33 1 5 0 1\n", "4 296 104 41 35 1 5 0 0\n", "5 357 168 44 28 1 4 0 0\n", "6 387 133 35 45 1 4 0 0\n", "7 409 120 34 43 1 4 0 0\n", "8 392 179 40 40 1 4 0 0\n", "9 702 474 18 47 1 4 0 0\n", "\n", "Rozdělení tříd: {4: 47, 1: 24, 5: 10, 0: 7, 9: 2, 6: 2, 10: 2, 2: 1}\n" ] } ], "source": [ "# Ukázkový anotační soubor\n", "# Formát: ,,,,,,,\n", "import pandas as pd\n", "\n", "ann_dir = SPLIT_DIRS[\"train\"] / \"annotations\"\n", "sample_ann = next(ann_dir.glob(\"*.txt\"))\n", "df = pd.read_csv(sample_ann, header=None,\n", " names=[\"x\",\"y\",\"w\",\"h\",\"score\",\"cat\",\"trunc\",\"occ\"])\n", "print(f\"Soubor: {sample_ann.name}\")\n", "print(df.head(10))\n", "print(\"\\nRozdělení tříd:\", df[\"cat\"].value_counts().to_dict())" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "07441c9ee3cd4cb883cc359614ea69f1", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train: 0%| | 0/6471 [00:00" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Uloženo: sample_annotations.png\n" ] } ], "source": [ "# Vizualizace ukázkových anotací\n", "import matplotlib.pyplot as plt\n", "import matplotlib.patches as patches\n", "import random\n", "\n", "COLORS = [\"#00FF00\", \"#FFFF00\", \"#FF0000\", \"#0080FF\"] # car, van, truck, bus\n", "\n", "def show_sample(split=\"train\", n=4):\n", " img_dir = YOLO_DIR / split / \"images\"\n", " lbl_dir = YOLO_DIR / split / \"labels\"\n", " # Jen obrázky s neprázdnými labely\n", " imgs = [\n", " p for p in img_dir.glob(\"*.jpg\")\n", " if (lbl_dir / p.with_suffix(\".txt\").name).exists()\n", " and (lbl_dir / p.with_suffix(\".txt\").name).stat().st_size > 0\n", " ]\n", " samples = random.sample(imgs, min(n, len(imgs)))\n", "\n", " fig, axes = plt.subplots(1, len(samples), figsize=(5*len(samples), 5))\n", " if len(samples) == 1:\n", " axes = [axes]\n", "\n", " for ax, img_path in zip(axes, samples):\n", " img = Image.open(img_path)\n", " iw, ih = img.size\n", " ax.imshow(img)\n", " lbl_path = lbl_dir / img_path.with_suffix(\".txt\").name\n", " for line in lbl_path.read_text().splitlines():\n", " if not line.strip():\n", " continue\n", " cls, cx, cy, nw, nh = map(float, line.split())\n", " cls = int(cls)\n", " x = (cx - nw/2) * iw\n", " y = (cy - nh/2) * ih\n", " w = nw * iw\n", " h = nh * ih\n", " rect = patches.Rectangle((x, y), w, h, linewidth=1.5,\n", " edgecolor=COLORS[cls], facecolor=\"none\")\n", " ax.add_patch(rect)\n", " ax.text(x, y-2, CLASS_NAMES[cls], color=COLORS[cls], fontsize=7)\n", " ax.set_title(img_path.stem[:20], fontsize=8)\n", " ax.axis(\"off\")\n", "\n", " plt.suptitle(\"Ukázkové anotace (zelená=car, žlutá=van, červená=truck, modrá=bus)\")\n", " plt.tight_layout()\n", " plt.savefig(\"sample_annotations.png\", dpi=150, bbox_inches=\"tight\")\n", " plt.show()\n", " print(f\"Uloženo: sample_annotations.png\")\n", "\n", "show_sample()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "train (6471 snímků):\n", " car : 144866\n", " van : 24956\n", " truck : 12875\n", " bus : 5926\n", " CELKEM : 188623\n", "\n", "val (548 snímků):\n", " car : 14064\n", " van : 1975\n", " truck : 750\n", " bus : 251\n", " CELKEM : 17040\n" ] } ], "source": [ "# Statistiky datasetu\n", "from collections import Counter\n", "\n", "for split in [\"train\", \"val\"]:\n", " lbl_dir = YOLO_DIR / split / \"labels\"\n", " counter = Counter()\n", " n_imgs = 0\n", " for lbl in lbl_dir.glob(\"*.txt\"):\n", " lines = [l for l in lbl.read_text().splitlines() if l.strip()]\n", " for line in lines:\n", " counter[int(line.split()[0])] += 1\n", " n_imgs += 1\n", " print(f\"\\n{split} ({n_imgs} snímků):\")\n", " for cls_id, cnt in sorted(counter.items()):\n", " print(f\" {CLASS_NAMES[cls_id]:10s}: {cnt:6d}\")\n", " print(f\" {'CELKEM':10s}: {sum(counter.values()):6d}\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv (3.14.4)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.14.4" } }, "nbformat": 4, "nbformat_minor": 4 }