greenhouse/yolov3/tutorial.ipynb

1054 lines
52 KiB
Plaintext
Vendored
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "YOLOv3 Tutorial",
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"eeda9d6850e8406f9bbc5b06051b3710": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_1e823c45174a4216be7234a6cc5cfd99",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_cd8efd6c5de94ea8848a7d5b8766a4d6",
"IPY_MODEL_a4ec69c4697c4b0e84e6193be227f63e",
"IPY_MODEL_9a5694c133be46df8d2fe809b77c1c35"
]
}
},
"1e823c45174a4216be7234a6cc5cfd99": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"cd8efd6c5de94ea8848a7d5b8766a4d6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_d584167143f84a0484006dded3fd2620",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_b9a25c0d425c4fe4b8cd51ae6a301b0d"
}
},
"a4ec69c4697c4b0e84e6193be227f63e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_654525fe1ed34d5fbe1c36ed80ae1c1c",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 818322941,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 818322941,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_09544845070e47baafc5e37d45ff23e9"
}
},
"9a5694c133be46df8d2fe809b77c1c35": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_1066f1d5b6104a3dae19f26269745bd0",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 780M/780M [00:03<00:00, 200MB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_dd3a70e1ef4547ec8d3463749ce06285"
}
},
"d584167143f84a0484006dded3fd2620": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"b9a25c0d425c4fe4b8cd51ae6a301b0d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"654525fe1ed34d5fbe1c36ed80ae1c1c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"09544845070e47baafc5e37d45ff23e9": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"1066f1d5b6104a3dae19f26269745bd0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"dd3a70e1ef4547ec8d3463749ce06285": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/ultralytics/yolov3/blob/master/tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "t6MPjfT5NrKQ"
},
"source": [
"<a align=\"left\" href=\"https://ultralytics.com/yolov3\" target=\"_blank\">\n",
"<img width=\"1024\", src=\"https://user-images.githubusercontent.com/26833433/99805971-90f66b80-2b3d-11eb-80eb-8b45a15cb68e.jpg\"></a>\n",
"\n",
"This is the **official YOLOv3 🚀 notebook** by **Ultralytics**, and is freely available for redistribution under the [GPL-3.0 license](https://choosealicense.com/licenses/gpl-3.0/). \n",
"For more information please visit https://github.com/ultralytics/yolov3 and https://ultralytics.com. Thank you!"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7mGmQbAO5pQb"
},
"source": [
"# Setup\n",
"\n",
"Clone repo, install dependencies and check PyTorch and GPU."
]
},
{
"cell_type": "code",
"metadata": {
"id": "wbvMlHd_QwMG",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "141002fc-fe49-48d2-a575-2555bf903413"
},
"source": [
"!git clone https://github.com/ultralytics/yolov3 # clone\n",
"%cd yolov3\n",
"%pip install -qr requirements.txt # install\n",
"\n",
"import torch\n",
"from yolov3 import utils\n",
"display = utils.notebook_init() # checks"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Setup complete ✅\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4JnkELT0cIJg"
},
"source": [
"# 1. Inference\n",
"\n",
"`detect.py` runs YOLOv3 inference on a variety of sources, downloading models automatically from the [latest YOLOv3 release](https://github.com/ultralytics/yolov3/releases), and saving results to `runs/detect`. Example inference sources are:\n",
"\n",
"```shell\n",
"python detect.py --source 0 # webcam\n",
" img.jpg # image \n",
" vid.mp4 # video\n",
" path/ # directory\n",
" path/*.jpg # glob\n",
" 'https://youtu.be/Zgi9g1ksQHc' # YouTube\n",
" 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream\n",
"```"
]
},
{
"cell_type": "code",
"metadata": {
"id": "zR9ZbuQCH7FX",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "c29b082a-8e56-4799-b32a-056425f130d1"
},
"source": [
"!python detect.py --weights yolov3.pt --img 640 --conf 0.25 --source data/images\n",
"# display.Image(filename='runs/detect/exp/zidane.jpg', width=600)"
],
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[34m\u001b[1mdetect: \u001b[0mweights=['yolov3.pt'], source=data/images, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False\n",
"YOLOv3 🚀 v9.6.0-29-ga441ab1 torch 1.13.0+cu116 CUDA:0 (Tesla T4, 15110MiB)\n",
"\n",
"Fusing layers... \n",
"Model Summary: 261 layers, 61922845 parameters, 0 gradients\n",
"image 1/2 /content/yolov3/data/images/bus.jpg: 640x480 4 persons, 1 bicycle, 1 bus, Done. (0.050s)\n",
"image 2/2 /content/yolov3/data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.038s)\n",
"Speed: 0.5ms pre-process, 44.3ms inference, 1.3ms NMS per image at shape (1, 3, 640, 640)\n",
"Results saved to \u001b[1mruns/detect/exp2\u001b[0m\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "hkAzDWJ7cWTr"
},
"source": [
"&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\n",
"<img align=\"left\" src=\"https://user-images.githubusercontent.com/26833433/127574988-6a558aa1-d268-44b9-bf6b-62d4c605cc72.jpg\" width=\"600\">"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "0eq1SMWl6Sfn"
},
"source": [
"# 2. Validate\n",
"Validate a model's accuracy on [COCO](https://cocodataset.org/#home) val or test-dev datasets. Models are downloaded automatically from the [latest YOLOv3 release](https://github.com/ultralytics/yolov3/releases). To show results by class use the `--verbose` flag. Note that `pycocotools` metrics may be ~1% better than the equivalent repo metrics, as is visible below, due to slight differences in mAP computation."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "eyTZYGgRjnMc"
},
"source": [
"## COCO val\n",
"Download [COCO val 2017](https://github.com/ultralytics/yolov3/blob/master/data/coco.yaml) dataset (1GB - 5000 images), and test model accuracy."
]
},
{
"cell_type": "code",
"metadata": {
"id": "WQPtK1QYVaD_",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 48,
"referenced_widgets": [
"eeda9d6850e8406f9bbc5b06051b3710",
"1e823c45174a4216be7234a6cc5cfd99",
"cd8efd6c5de94ea8848a7d5b8766a4d6",
"a4ec69c4697c4b0e84e6193be227f63e",
"9a5694c133be46df8d2fe809b77c1c35",
"d584167143f84a0484006dded3fd2620",
"b9a25c0d425c4fe4b8cd51ae6a301b0d",
"654525fe1ed34d5fbe1c36ed80ae1c1c",
"09544845070e47baafc5e37d45ff23e9",
"1066f1d5b6104a3dae19f26269745bd0",
"dd3a70e1ef4547ec8d3463749ce06285"
]
},
"outputId": "56199bac-5a5e-41eb-8892-bf387a1ec7cb"
},
"source": [
"# Download COCO val\n",
"torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017val.zip', 'tmp.zip')\n",
"!unzip -q tmp.zip -d ../datasets && rm tmp.zip"
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "eeda9d6850e8406f9bbc5b06051b3710",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0.00/780M [00:00<?, ?B/s]"
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "X58w8JLpMnjH",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "15c92efb-05ec-48e0-b9ef-ff34871354c8"
},
"source": [
"# Run YOLOv3 on COCO val\n",
"!python val.py --weights yolov3.pt --data coco.yaml --img 640 --iou 0.65 --half"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[34m\u001b[1mval: \u001b[0mdata=/content/yolov3/data/coco.yaml, weights=['yolov3.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True, dnn=False\n",
"YOLOv3 🚀 v9.6.0-1-g93a2bcc torch 1.10.0+cu111 CUDA:0 (A100-SXM4-40GB, 40536MiB)\n",
"\n",
"Fusing layers... \n",
"Model Summary: 261 layers, 61922845 parameters, 0 gradients, 156.1 GFLOPs\n",
"\u001b[34m\u001b[1mval: \u001b[0mScanning '../datasets/coco/val2017.cache' images and labels... 4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:00<?, ?it/s]\n",
" Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 157/157 [00:45<00:00, 3.45it/s]\n",
" all 5000 36335 0.71 0.602 0.649 0.453\n",
"Speed: 0.1ms pre-process, 2.1ms inference, 1.5ms NMS per image at shape (32, 3, 640, 640)\n",
"\n",
"Evaluating pycocotools mAP... saving runs/val/exp/yolov3_predictions.json...\n",
"loading annotations into memory...\n",
"Done (t=0.44s)\n",
"creating index...\n",
"index created!\n",
"Loading and preparing results...\n",
"DONE (t=4.99s)\n",
"creating index...\n",
"index created!\n",
"Running per image evaluation...\n",
"Evaluate annotation type *bbox*\n",
"DONE (t=91.45s).\n",
"Accumulating evaluation results...\n",
"DONE (t=15.18s).\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.464\n",
" Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.655\n",
" Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.507\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.298\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.515\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.585\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.359\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.594\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.649\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.476\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.698\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.779\n",
"Results saved to \u001b[1mruns/val/exp\u001b[0m\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "rc_KbFk0juX2"
},
"source": [
"## COCO test\n",
"Download [COCO test2017](https://github.com/ultralytics/yolov3/blob/master/data/coco.yaml) dataset (7GB - 40,000 images), to test model accuracy on test-dev set (**20,000 images, no labels**). Results are saved to a `*.json` file which should be **zipped** and submitted to the evaluation server at https://competitions.codalab.org/competitions/20794."
]
},
{
"cell_type": "code",
"metadata": {
"id": "V0AJnSeCIHyJ"
},
"source": [
"# Download COCO test-dev2017\n",
"torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017labels.zip', 'tmp.zip')\n",
"!unzip -q tmp.zip -d ../datasets && rm tmp.zip\n",
"!f=\"test2017.zip\" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d ../datasets/coco/images"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "29GJXAP_lPrt"
},
"source": [
"# Run YOLOv3 on COCO test\n",
"!python val.py --weights yolov3.pt --data coco.yaml --img 640 --iou 0.65 --half --task test"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZY2VXXXu74w5"
},
"source": [
"# 3. Train\n",
"\n",
"<p align=\"\"><a href=\"https://roboflow.com/?ref=ultralytics\"><img width=\"1000\" src=\"https://uploads-ssl.webflow.com/5f6bc60e665f54545a1e52a5/615627e5824c9c6195abfda9_computer-vision-cycle.png\"/></a></p>\n",
"Close the active learning loop by sampling images from your inference conditions with the `roboflow` pip package\n",
"<br><br>\n",
"\n",
"Train a YOLOv3 model on the [COCO128](https://www.kaggle.com/ultralytics/coco128) dataset with `--data coco128.yaml`, starting from pretrained `--weights yolov3.pt`, or from randomly initialized `--weights '' --cfg yolov3yaml`.\n",
"\n",
"- **Pretrained [Models](https://github.com/ultralytics/yolov3/tree/master/models)** are downloaded\n",
"automatically from the [latest YOLOv3 release](https://github.com/ultralytics/yolov3/releases)\n",
"- **[Datasets](https://github.com/ultralytics/yolov3/tree/master/data)** available for autodownload include: [COCO](https://github.com/ultralytics/yolov3/blob/master/data/coco.yaml), [COCO128](https://github.com/ultralytics/yolov3/blob/master/data/coco128.yaml), [VOC](https://github.com/ultralytics/yolov3/blob/master/data/VOC.yaml), [Argoverse](https://github.com/ultralytics/yolov3/blob/master/data/Argoverse.yaml), [VisDrone](https://github.com/ultralytics/yolov3/blob/master/data/VisDrone.yaml), [GlobalWheat](https://github.com/ultralytics/yolov3/blob/master/data/GlobalWheat2020.yaml), [xView](https://github.com/ultralytics/yolov3/blob/master/data/xView.yaml), [Objects365](https://github.com/ultralytics/yolov3/blob/master/data/Objects365.yaml), [SKU-110K](https://github.com/ultralytics/yolov3/blob/master/data/SKU-110K.yaml).\n",
"- **Training Results** are saved to `runs/train/` with incrementing run directories, i.e. `runs/train/exp2`, `runs/train/exp3` etc.\n",
"<br><br>\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "bOy5KI2ncnWd"
},
"source": [
"# Tensorboard (optional)\n",
"%load_ext tensorboard\n",
"%tensorboard --logdir runs/train"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "1NcFxRcFdJ_O",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "c77013e3-347d-42a4-84de-3ca42ea3aee9"
},
"source": [
"# Train YOLOv3 on COCO128 for 3 epochs\n",
"!python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov3.pt --cache"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[34m\u001b[1mtrain: \u001b[0mweights=yolov3.pt, cfg=, data=coco128.yaml, hyp=data/hyps/hyp.scratch.yaml, epochs=3, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, adam=False, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, patience=100, freeze=0, save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest\n",
"\u001b[34m\u001b[1mgithub: \u001b[0mup to date with https://github.com/ultralytics/yolov3 ✅\n",
"YOLOv3 🚀 v9.6.0-29-ga441ab1 torch 1.13.0+cu116 CUDA:0 (Tesla T4, 15110MiB)\n",
"\n",
"\u001b[34m\u001b[1mhyperparameters: \u001b[0mlr0=0.01, lrf=0.1, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0\n",
"\u001b[34m\u001b[1mWeights & Biases: \u001b[0mrun 'pip install wandb' to automatically track and visualize YOLOv3 🚀 runs (RECOMMENDED)\n",
"\u001b[34m\u001b[1mTensorBoard: \u001b[0mStart with 'tensorboard --logdir runs/train', view at http://localhost:6006/\n",
"\n",
"WARNING: Dataset not found, nonexistent paths: ['/content/datasets/coco128/images/train2017']\n",
"Downloading https://ultralytics.com/assets/coco128.zip to coco128.zip...\n",
"100% 6.66M/6.66M [00:00<00:00, 10.2MB/s]\n",
"Dataset autodownload success, saved to ../datasets\n",
"\n",
"\n",
" from n params module arguments \n",
" 0 -1 1 928 models.common.Conv [3, 32, 3, 1] \n",
" 1 -1 1 18560 models.common.Conv [32, 64, 3, 2] \n",
" 2 -1 1 20672 models.common.Bottleneck [64, 64] \n",
" 3 -1 1 73984 models.common.Conv [64, 128, 3, 2] \n",
" 4 -1 2 164608 models.common.Bottleneck [128, 128] \n",
" 5 -1 1 295424 models.common.Conv [128, 256, 3, 2] \n",
" 6 -1 8 2627584 models.common.Bottleneck [256, 256] \n",
" 7 -1 1 1180672 models.common.Conv [256, 512, 3, 2] \n",
" 8 -1 8 10498048 models.common.Bottleneck [512, 512] \n",
" 9 -1 1 4720640 models.common.Conv [512, 1024, 3, 2] \n",
" 10 -1 4 20983808 models.common.Bottleneck [1024, 1024] \n",
" 11 -1 1 5245952 models.common.Bottleneck [1024, 1024, False] \n",
" 12 -1 1 525312 models.common.Conv [1024, 512, 1, 1] \n",
" 13 -1 1 4720640 models.common.Conv [512, 1024, 3, 1] \n",
" 14 -1 1 525312 models.common.Conv [1024, 512, 1, 1] \n",
" 15 -1 1 4720640 models.common.Conv [512, 1024, 3, 1] \n",
" 16 -2 1 131584 models.common.Conv [512, 256, 1, 1] \n",
" 17 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n",
" 18 [-1, 8] 1 0 models.common.Concat [1] \n",
" 19 -1 1 1377792 models.common.Bottleneck [768, 512, False] \n",
" 20 -1 1 1312256 models.common.Bottleneck [512, 512, False] \n",
" 21 -1 1 131584 models.common.Conv [512, 256, 1, 1] \n",
" 22 -1 1 1180672 models.common.Conv [256, 512, 3, 1] \n",
" 23 -2 1 33024 models.common.Conv [256, 128, 1, 1] \n",
" 24 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n",
" 25 [-1, 6] 1 0 models.common.Concat [1] \n",
" 26 -1 1 344832 models.common.Bottleneck [384, 256, False] \n",
" 27 -1 2 656896 models.common.Bottleneck [256, 256, False] \n",
" 28 [27, 22, 15] 1 457725 models.yolo.Detect [80, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [256, 512, 1024]]\n",
"Model Summary: 333 layers, 61949149 parameters, 61949149 gradients, 156.6 GFLOPs\n",
"\n",
"Transferred 439/439 items from yolov3.pt\n",
"Scaled weight_decay = 0.0005\n",
"\u001b[34m\u001b[1moptimizer:\u001b[0m SGD with parameter groups 72 weight, 75 weight (no decay), 75 bias\n",
"\u001b[34m\u001b[1malbumentations: \u001b[0mBlur(always_apply=False, p=0.01, blur_limit=(3, 7)), MedianBlur(always_apply=False, p=0.01, blur_limit=(3, 7)), ToGray(always_apply=False, p=0.01), CLAHE(always_apply=False, p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))\n",
"\u001b[34m\u001b[1mtrain: \u001b[0mScanning '../datasets/coco128/labels/train2017' images and labels...126 found, 2 missing, 0 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 1542.80it/s]\n",
"\u001b[34m\u001b[1mtrain: \u001b[0mNew cache created: ../datasets/coco128/labels/train2017.cache\n",
"\u001b[34m\u001b[1mtrain: \u001b[0mCaching images (0.1GB ram): 100% 128/128 [00:00<00:00, 327.35it/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mScanning '../datasets/coco128/labels/train2017.cache' images and labels... 126 found, 2 missing, 0 empty, 0 corrupted: 100% 128/128 [00:00<?, ?it/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mCaching images (0.1GB ram): 100% 128/128 [00:01<00:00, 124.89it/s]\n",
"Plotting labels to runs/train/exp/labels.jpg... \n",
"\n",
"\u001b[34m\u001b[1mAutoAnchor: \u001b[0m4.27 anchors/target, 0.994 Best Possible Recall (BPR). Current anchors are a good fit to dataset ✅\n",
"Image sizes 640 train, 640 val\n",
"Using 2 dataloader workers\n",
"Logging results to \u001b[1mruns/train/exp\u001b[0m\n",
"Starting training for 3 epochs...\n",
"\n",
" Epoch gpu_mem box obj cls labels img_size\n",
" 0/2 11.9G 0.03646 0.05139 0.01218 170 640: 100% 8/8 [00:11<00:00, 1.41s/it]\n",
" Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:03<00:00, 1.18it/s]\n",
" all 128 929 0.701 0.771 0.81 0.578\n",
"\n",
" Epoch gpu_mem box obj cls labels img_size\n",
" 1/2 10.7G 0.03638 0.05337 0.01048 204 640: 100% 8/8 [00:06<00:00, 1.28it/s]\n",
" Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:02<00:00, 1.94it/s]\n",
" all 128 929 0.708 0.774 0.814 0.58\n",
"\n",
" Epoch gpu_mem box obj cls labels img_size\n",
" 2/2 12.1G 0.03671 0.05941 0.01124 281 640: 100% 8/8 [00:06<00:00, 1.26it/s]\n",
" Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:02<00:00, 1.91it/s]\n",
" all 128 929 0.706 0.776 0.816 0.583\n",
"\n",
"3 epochs completed in 0.012 hours.\n",
"Optimizer stripped from runs/train/exp/weights/last.pt, 124.4MB\n",
"Optimizer stripped from runs/train/exp/weights/best.pt, 124.4MB\n",
"\n",
"Validating runs/train/exp/weights/best.pt...\n",
"Fusing layers... \n",
"Model Summary: 261 layers, 61922845 parameters, 0 gradients, 155.9 GFLOPs\n",
" Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:04<00:00, 1.11s/it]\n",
" all 128 929 0.733 0.755 0.814 0.583\n",
" person 128 254 0.827 0.803 0.861 0.627\n",
" bicycle 128 6 0.534 0.58 0.586 0.386\n",
" car 128 46 0.847 0.565 0.688 0.331\n",
" motorcycle 128 5 0.776 1 0.995 0.817\n",
" airplane 128 6 0.915 1 0.995 0.837\n",
" bus 128 7 1 0.764 0.978 0.817\n",
" train 128 3 0.844 1 0.995 0.797\n",
" truck 128 12 0.644 0.583 0.649 0.417\n",
" boat 128 6 0.6 0.501 0.695 0.493\n",
" traffic light 128 14 0.854 0.429 0.539 0.276\n",
" stop sign 128 2 0.723 1 0.995 0.796\n",
" bench 128 9 1 0.657 0.796 0.369\n",
" bird 128 16 0.962 1 0.995 0.672\n",
" cat 128 4 0.751 1 0.995 0.933\n",
" dog 128 9 0.773 1 0.955 0.746\n",
" horse 128 2 0.638 1 0.995 0.623\n",
" elephant 128 17 0.995 0.941 0.947 0.782\n",
" bear 128 1 0.604 1 0.995 0.895\n",
" zebra 128 4 0.863 1 0.995 0.946\n",
" giraffe 128 9 0.962 1 0.995 0.822\n",
" backpack 128 6 0.874 0.667 0.714 0.486\n",
" umbrella 128 18 0.792 0.833 0.867 0.564\n",
" handbag 128 19 0.644 0.526 0.544 0.359\n",
" tie 128 7 0.872 0.857 0.858 0.604\n",
" suitcase 128 4 0.61 1 0.995 0.672\n",
" frisbee 128 5 0.717 0.8 0.76 0.61\n",
" skis 128 1 0.672 1 0.995 0.597\n",
" snowboard 128 7 0.852 0.829 0.873 0.632\n",
" sports ball 128 6 0.766 0.833 0.78 0.559\n",
" kite 128 10 0.377 0.6 0.57 0.172\n",
" baseball bat 128 4 0.637 0.888 0.912 0.365\n",
" baseball glove 128 7 0.512 0.571 0.597 0.428\n",
" skateboard 128 5 0.625 0.8 0.803 0.427\n",
" tennis racket 128 7 0.776 0.714 0.718 0.36\n",
" bottle 128 18 0.744 0.889 0.741 0.482\n",
" wine glass 128 16 0.82 0.688 0.888 0.494\n",
" cup 128 36 0.801 0.889 0.884 0.628\n",
" fork 128 6 0.565 0.5 0.596 0.418\n",
" knife 128 16 0.829 0.812 0.822 0.519\n",
" spoon 128 22 0.593 0.5 0.583 0.397\n",
" bowl 128 28 0.869 0.75 0.798 0.635\n",
" banana 128 1 0.694 1 0.995 0.895\n",
" sandwich 128 2 0 0 0.497 0.202\n",
" orange 128 4 1 0.679 0.888 0.691\n",
" broccoli 128 11 0.449 0.364 0.469 0.353\n",
" carrot 128 24 0.727 0.777 0.814 0.526\n",
" hot dog 128 2 0.56 1 0.828 0.828\n",
" pizza 128 5 0.722 1 0.962 0.689\n",
" donut 128 14 0.716 1 0.973 0.875\n",
" cake 128 4 0.714 1 0.995 0.871\n",
" chair 128 35 0.65 0.857 0.856 0.546\n",
" couch 128 6 0.983 0.833 0.899 0.583\n",
" potted plant 128 14 0.832 0.857 0.92 0.567\n",
" bed 128 3 0.474 0.316 0.608 0.487\n",
" dining table 128 13 0.663 0.615 0.666 0.4\n",
" toilet 128 2 0.744 1 0.995 0.896\n",
" tv 128 2 0.685 1 0.995 0.846\n",
" laptop 128 3 1 0 0.913 0.445\n",
" mouse 128 2 0.962 0.5 0.745 0.35\n",
" remote 128 8 0.856 0.625 0.712 0.581\n",
" cell phone 128 8 0.598 0.5 0.637 0.386\n",
" microwave 128 3 0.723 1 0.995 0.852\n",
" oven 128 5 0.556 0.6 0.499 0.379\n",
" sink 128 6 0.549 0.416 0.528 0.31\n",
" refrigerator 128 5 0.6 0.8 0.845 0.587\n",
" book 128 29 0.552 0.276 0.411 0.216\n",
" clock 128 9 0.731 1 0.975 0.798\n",
" vase 128 2 0.588 1 0.995 0.995\n",
" scissors 128 1 1 0 0.332 0.0663\n",
" teddy bear 128 21 0.898 0.838 0.934 0.631\n",
" toothbrush 128 5 0.791 1 0.995 0.754\n",
"Results saved to \u001b[1mruns/train/exp\u001b[0m\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "15glLzbQx5u0"
},
"source": [
"# 4. Visualize"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-WPvRbS5Swl6"
},
"source": [
"## Local Logging\n",
"\n",
"All results are logged by default to `runs/train`, with a new experiment directory created for each new training as `runs/train/exp2`, `runs/train/exp3`, etc. View train and val jpgs to see mosaics, labels, predictions and augmentation effects. Note an Ultralytics **Mosaic Dataloader** is used for training (shown below), which combines 4 images into 1 mosaic during training.\n",
"\n",
"> <img src=\"https://user-images.githubusercontent.com/26833433/131255960-b536647f-7c61-4f60-bbc5-cb2544d71b2a.jpg\" width=\"700\"> \n",
"`train_batch0.jpg` shows train batch 0 mosaics and labels\n",
"\n",
"> <img src=\"https://user-images.githubusercontent.com/26833433/131256748-603cafc7-55d1-4e58-ab26-83657761aed9.jpg\" width=\"700\"> \n",
"`test_batch0_labels.jpg` shows val batch 0 labels\n",
"\n",
"> <img src=\"https://user-images.githubusercontent.com/26833433/131256752-3f25d7a5-7b0f-4bb3-ab78-46343c3800fe.jpg\" width=\"700\"> \n",
"`test_batch0_pred.jpg` shows val batch 0 _predictions_\n",
"\n",
"Training results are automatically logged to [Tensorboard](https://www.tensorflow.org/tensorboard) and [CSV](https://github.com/ultralytics/yolov5/pull/4148) as `results.csv`, which is plotted as `results.png` (below) after training completes. You can also plot any `results.csv` file manually:\n",
"\n",
"```python\n",
"from utils.plots import plot_results \n",
"plot_results('path/to/results.csv') # plot 'results.csv' as 'results.png'\n",
"```\n",
"\n",
"<img align=\"left\" width=\"800\" alt=\"COCO128 Training Results\" src=\"https://user-images.githubusercontent.com/26833433/126906780-8c5e2990-6116-4de6-b78a-367244a33ccf.png\">"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Zelyeqbyt3GD"
},
"source": [
"# Environments\n",
"\n",
"YOLOv3 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):\n",
"\n",
"- **Google Colab and Kaggle** notebooks with free GPU: <a href=\"https://colab.research.google.com/github/ultralytics/yolov3/blob/master/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a> <a href=\"https://www.kaggle.com/ultralytics/yolov3\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
"- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov3/wiki/GCP-Quickstart)\n",
"- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://github.com/ultralytics/yolov3/wiki/AWS-Quickstart)\n",
"- **Docker Image**. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov3/wiki/Docker-Quickstart) <a href=\"https://hub.docker.com/r/ultralytics/yolov3\"><img src=\"https://img.shields.io/docker/pulls/ultralytics/yolov3?logo=docker\" alt=\"Docker Pulls\"></a>\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6Qu7Iesl0p54"
},
"source": [
"# Status\n",
"\n",
"![CI CPU testing](https://github.com/ultralytics/yolov3/workflows/CI%20CPU%20testing/badge.svg)\n",
"\n",
"If this badge is green, all [YOLOv3 GitHub Actions](https://github.com/ultralytics/yolov3/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv3 training ([train.py](https://github.com/ultralytics/yolov3/blob/master/train.py)), testing ([val.py](https://github.com/ultralytics/yolov3/blob/master/val.py)), inference ([detect.py](https://github.com/ultralytics/yolov3/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov3/blob/master/export.py)) on MacOS, Windows, and Ubuntu every 24 hours and on every commit.\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "IEijrePND_2I"
},
"source": [
"# Appendix\n",
"\n",
"Optional extras below. Unit tests validate repo functionality and should be run on any PRs submitted.\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "mcKoSIK2WSzj"
},
"source": [
"# Reproduce\n",
"for x in 'yolov3', 'yolov3-spp', 'yolov3-tiny':\n",
" !python val.py --weights {x}.pt --data coco.yaml --img 640 --task speed # speed\n",
" !python val.py --weights {x}.pt --data coco.yaml --img 640 --conf 0.001 --iou 0.65 # mAP"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "GMusP4OAxFu6"
},
"source": [
"# PyTorch Hub\n",
"import torch\n",
"\n",
"# Model\n",
"model = torch.hub.load('ultralytics/yolov3', 'yolov3')\n",
"\n",
"# Images\n",
"dir = 'https://ultralytics.com/images/'\n",
"imgs = [dir + f for f in ('zidane.jpg', 'bus.jpg')] # batch of images\n",
"\n",
"# Inference\n",
"results = model(imgs)\n",
"results.print() # or .show(), .save()"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "FGH0ZjkGjejy"
},
"source": [
"# CI Checks\n",
"%%shell\n",
"export PYTHONPATH=\"$PWD\" # to run *.py. files in subdirectories\n",
"rm -rf runs # remove runs/\n",
"for m in yolov3-tiny; do # models\n",
" python train.py --img 64 --batch 32 --weights $m.pt --epochs 1 --device 0 # train pretrained\n",
" python train.py --img 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device 0 # train scratch\n",
" for d in 0 cpu; do # devices\n",
" python val.py --weights $m.pt --device $d # val official\n",
" python val.py --weights runs/train/exp/weights/best.pt --device $d # val custom\n",
" python detect.py --weights $m.pt --device $d # detect official\n",
" python detect.py --weights runs/train/exp/weights/best.pt --device $d # detect custom\n",
" done\n",
" python hubconf.py # hub\n",
" python models/yolo.py --cfg $m.yaml # build PyTorch model\n",
" python models/tf.py --weights $m.pt # build TensorFlow model\n",
" python export.py --img 64 --batch 1 --weights $m.pt --include torchscript onnx # export\n",
"done"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "gogI-kwi3Tye"
},
"source": [
"# Profile\n",
"from utils.torch_utils import profile\n",
"\n",
"m1 = lambda x: x * torch.sigmoid(x)\n",
"m2 = torch.nn.SiLU()\n",
"results = profile(input=torch.randn(16, 3, 640, 640), ops=[m1, m2], n=100)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "RVRSOhEvUdb5"
},
"source": [
"# Evolve\n",
"!python train.py --img 640 --batch 64 --epochs 100 --data coco128.yaml --weights yolov3.pt --cache --noautoanchor --evolve\n",
"!d=runs/train/evolve && cp evolve.* $d && zip -r evolve.zip $d && gsutil mv evolve.zip gs://bucket # upload results (optional)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "BSgFCAcMbk1R"
},
"source": [
"# VOC\n",
"for b, m in zip([24, 24, 64], ['yolov3', 'yolov3-spp', 'yolov3-tiny']): # zip(batch_size, model)\n",
" !python train.py --batch {b} --weights {m}.pt --data VOC.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}"
],
"execution_count": null,
"outputs": []
}
]
}