{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 01 — Příprava datasetu VisDrone\n", "\n", "**VisDrone2019-DET** je veřejně dostupný dataset leteckých snímků s anotacemi vozidel.\n", "Obsahuje třídy: `pedestrian, people, bicycle, car, van, truck, tricycle, awning-tricycle, bus, motor`.\n", "\n", "My použijeme pouze vozidlové třídy a převedeme anotace do formátu YOLO.\n", "\n", "**Citace:**\n", "> Zhu, P., Wen, L., Du, D., et al. (2021). Detection and Tracking Meet Drones Challenge. *IEEE TPAMI*." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable.It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.0.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "%pip install ultralytics Pillow tqdm requests pyyaml pandas matplotlib --quiet" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Stahuji train.zip ...\n", "Rozbaluji train.zip ...\n", " OK → data/visdrone/VisDrone2019-DET-train\n", "Stahuji val.zip ...\n", "Rozbaluji val.zip ...\n", " OK → data/visdrone/VisDrone2019-DET-val\n", "Stahuji test.zip ...\n", "Rozbaluji test.zip ...\n", " OK → data/visdrone/VisDrone2019-DET-test-dev\n", "Dataset připraven.\n" ] } ], "source": [ "from pathlib import Path\n", "import shutil, zipfile, urllib.request\n", "from tqdm.auto import tqdm\n", "\n", "DATA_DIR = Path(\"data/visdrone\")\n", "DATA_DIR.mkdir(parents=True, exist_ok=True)\n", "\n", "# Mapování split → název složky (VisDrone používá plné názvy)\n", "SPLIT_DIRS = {\n", " \"train\": DATA_DIR / \"VisDrone2019-DET-train\",\n", " \"val\": DATA_DIR / \"VisDrone2019-DET-val\",\n", " \"test\": DATA_DIR / \"VisDrone2019-DET-test-dev\",\n", "}\n", "\n", "# URLs pro stažení (pokud složky ještě neexistují)\n", "SPLIT_URLS = {\n", " \"train\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-train.zip\",\n", " \"val\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-val.zip\",\n", " \"test\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-test-dev.zip\",\n", "}\n", "\n", "for split, split_dir in SPLIT_DIRS.items():\n", " if split_dir.exists():\n", " print(f\" {split}: složka existuje → {split_dir}\")\n", " continue\n", " zip_path = DATA_DIR / f\"{split}.zip\"\n", " if not zip_path.exists():\n", " print(f\"Stahuji {split}.zip ...\")\n", " urllib.request.urlretrieve(SPLIT_URLS[split], zip_path)\n", " print(f\"Rozbaluji {zip_path.name} ...\")\n", " with zipfile.ZipFile(zip_path) as zf:\n", " zf.extractall(DATA_DIR)\n", " print(f\" OK → {split_dir}\")\n", "\n", "print(\"Dataset připraven.\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "train: 6471 snímků, 6471 anotací → VisDrone2019-DET-train\n", "val : 548 snímků, 548 anotací → VisDrone2019-DET-val\n", "test : 1610 snímků, 1610 anotací → VisDrone2019-DET-test-dev\n" ] } ], "source": [ "# Prozkoumání struktury datasetu\n", "for split, split_dir in SPLIT_DIRS.items():\n", " if split_dir.exists():\n", " imgs = list((split_dir / \"images\").glob(\"*.jpg\"))\n", " anns = list((split_dir / \"annotations\").glob(\"*.txt\"))\n", " print(f\"{split:5s}: {len(imgs):4d} snímků, {len(anns):4d} anotací → {split_dir.name}\")\n", " else:\n", " print(f\"{split}: CHYBÍ ({split_dir})\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Soubor: 9999999_00887_d_0000407.txt\n", " x y w h score cat trunc occ\n", "0 1138 1332 53 108 0 11 0 0\n", "1 1240 1245 47 88 0 11 0 0\n", "2 1115 1324 10 33 1 1 0 0\n", "3 1132 1245 14 31 1 1 0 0\n", "4 1120 1196 14 19 1 1 0 0\n", "5 1250 1111 9 26 1 1 0 0\n", "6 1179 1072 8 20 1 1 0 1\n", "7 1194 1069 8 20 1 1 0 0\n", "8 1257 971 9 18 1 1 0 0\n", "9 1236 906 6 13 1 1 0 0\n", "\n", "Rozdělení tříd: {1: 17, 11: 3, 6: 2, 5: 2}\n" ] } ], "source": [ "# Ukázkový anotační soubor\n", "# Formát: ,,,,,,,\n", "import pandas as pd\n", "\n", "ann_dir = SPLIT_DIRS[\"train\"] / \"annotations\"\n", "sample_ann = next(ann_dir.glob(\"*.txt\"))\n", "df = pd.read_csv(sample_ann, header=None,\n", " names=[\"x\",\"y\",\"w\",\"h\",\"score\",\"cat\",\"trunc\",\"occ\"])\n", "print(f\"Soubor: {sample_ann.name}\")\n", "print(df.head(10))\n", "print(\"\\nRozdělení tříd:\", df[\"cat\"].value_counts().to_dict())" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e58b070b30ca45bc93aa7a275925bd75", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train: 0%| | 0/6471 [00:00" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Uloženo: sample_annotations.png\n" ] } ], "source": [ "# Vizualizace ukázkových anotací\n", "import matplotlib.pyplot as plt\n", "import matplotlib.patches as patches\n", "import random\n", "\n", "COLORS = [\"#00FF00\", \"#FFFF00\", \"#FF0000\", \"#0080FF\"] # car, van, truck, bus\n", "\n", "def show_sample(split=\"train\", n=4):\n", " img_dir = YOLO_DIR / split / \"images\"\n", " lbl_dir = YOLO_DIR / split / \"labels\"\n", " # Jen obrázky s neprázdnými labely\n", " imgs = [\n", " p for p in img_dir.glob(\"*.jpg\")\n", " if (lbl_dir / p.with_suffix(\".txt\").name).exists()\n", " and (lbl_dir / p.with_suffix(\".txt\").name).stat().st_size > 0\n", " ]\n", " samples = random.sample(imgs, min(n, len(imgs)))\n", "\n", " fig, axes = plt.subplots(1, len(samples), figsize=(5*len(samples), 5))\n", " if len(samples) == 1:\n", " axes = [axes]\n", "\n", " for ax, img_path in zip(axes, samples):\n", " img = Image.open(img_path)\n", " iw, ih = img.size\n", " ax.imshow(img)\n", " lbl_path = lbl_dir / img_path.with_suffix(\".txt\").name\n", " for line in lbl_path.read_text().splitlines():\n", " if not line.strip():\n", " continue\n", " cls, cx, cy, nw, nh = map(float, line.split())\n", " cls = int(cls)\n", " x = (cx - nw/2) * iw\n", " y = (cy - nh/2) * ih\n", " w = nw * iw\n", " h = nh * ih\n", " rect = patches.Rectangle((x, y), w, h, linewidth=1.5,\n", " edgecolor=COLORS[cls], facecolor=\"none\")\n", " ax.add_patch(rect)\n", " ax.text(x, y-2, CLASS_NAMES[cls], color=COLORS[cls], fontsize=7)\n", " ax.set_title(img_path.stem[:20], fontsize=8)\n", " ax.axis(\"off\")\n", "\n", " plt.suptitle(\"Ukázkové anotace (zelená=car, žlutá=van, červená=truck, modrá=bus)\")\n", " plt.tight_layout()\n", " plt.savefig(\"sample_annotations.png\", dpi=150, bbox_inches=\"tight\")\n", " plt.show()\n", " print(f\"Uloženo: sample_annotations.png\")\n", "\n", "show_sample()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "train (6471 snímků):\n", " car : 144866\n", " van : 24956\n", " truck : 12875\n", " bus : 5926\n", " CELKEM : 188623\n", "\n", "val (548 snímků):\n", " car : 14064\n", " van : 1975\n", " truck : 750\n", " bus : 251\n", " CELKEM : 17040\n" ] } ], "source": [ "# Statistiky datasetu\n", "from collections import Counter\n", "\n", "for split in [\"train\", \"val\"]:\n", " lbl_dir = YOLO_DIR / split / \"labels\"\n", " counter = Counter()\n", " n_imgs = 0\n", " for lbl in lbl_dir.glob(\"*.txt\"):\n", " lines = [l for l in lbl.read_text().splitlines() if l.strip()]\n", " for line in lines:\n", " counter[int(line.split()[0])] += 1\n", " n_imgs += 1\n", " print(f\"\\n{split} ({n_imgs} snímků):\")\n", " for cls_id, cnt in sorted(counter.items()):\n", " print(f\" {CLASS_NAMES[cls_id]:10s}: {cnt:6d}\")\n", " print(f\" {'CELKEM':10s}: {sum(counter.values()):6d}\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv (3.14.4)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.14.4" } }, "nbformat": 4, "nbformat_minor": 4 }