YOLOv5 v5.0 release compatibility update for YOLOv3

2021-05-30 18:55:56 +02:00
parent 47ac6833ca
commit 4d0c2e6eee
38 changed files with 1192 additions and 528 deletions
@@ -9,7 +9,7 @@ WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'

 def create_dataset_artifact(opt):
    with open(opt.data) as f:
-        data = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
+        data = yaml.safe_load(f)  # data dict
    logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation')


@@ -17,7 +17,7 @@ if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
    parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
-    parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project')
+    parser.add_argument('--project', type=str, default='YOLOv3', help='name of W&B Project')
    opt = parser.parse_args()
    opt.resume = False  # Explicitly disallow resume check for dataset upload job

@@ -1,3 +1,4 @@
+"""Utilities and tools for tracking runs with Weights & Biases."""
 import json
 import sys
 from pathlib import Path
@@ -9,7 +10,7 @@ from tqdm import tqdm
 sys.path.append(str(Path(__file__).parent.parent.parent))  # add utils/ to path
 from utils.datasets import LoadImagesAndLabels
 from utils.datasets import img2label_paths
-from utils.general import colorstr, xywh2xyxy, check_dataset
+from utils.general import colorstr, xywh2xyxy, check_dataset, check_file

 try:
    import wandb
@@ -35,8 +36,9 @@ def get_run_info(run_path):
    run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX))
    run_id = run_path.stem
    project = run_path.parent.stem
+    entity = run_path.parent.parent.stem
    model_artifact_name = 'run_' + run_id + '_model'
-    return run_id, project, model_artifact_name
+    return entity, project, run_id, model_artifact_name


 def check_wandb_resume(opt):
@@ -44,9 +46,9 @@ def check_wandb_resume(opt):
    if isinstance(opt.resume, str):
        if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
            if opt.global_rank not in [-1, 0]:  # For resuming DDP runs
-                run_id, project, model_artifact_name = get_run_info(opt.resume)
+                entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
                api = wandb.Api()
-                artifact = api.artifact(project + '/' + model_artifact_name + ':latest')
+                artifact = api.artifact(entity + '/' + project + '/' + model_artifact_name + ':latest')
                modeldir = artifact.download()
                opt.weights = str(Path(modeldir) / "last.pt")
            return True
@@ -54,8 +56,8 @@ def check_wandb_resume(opt):


 def process_wandb_config_ddp_mode(opt):
-    with open(opt.data) as f:
-        data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
+    with open(check_file(opt.data)) as f:
+        data_dict = yaml.safe_load(f)  # data dict
    train_dir, val_dir = None, None
    if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
        api = wandb.Api()
@@ -73,11 +75,23 @@ def process_wandb_config_ddp_mode(opt):
    if train_dir or val_dir:
        ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml')
        with open(ddp_data_path, 'w') as f:
-            yaml.dump(data_dict, f)
+            yaml.safe_dump(data_dict, f)
        opt.data = ddp_data_path


 class WandbLogger():
+    """Log training runs, datasets, models, and predictions to Weights & Biases.
+
+    This logger sends information to W&B at wandb.ai. By default, this information
+    includes hyperparameters, system configuration and metrics, model metrics,
+    and basic data metrics and analyses.
+
+    By providing additional command line arguments to train.py, datasets,
+    models and predictions can also be logged.
+
+    For more on how this logger is used, see the Weights & Biases documentation:
+    https://docs.wandb.com/guides/integrations/yolov5
+    """
    def __init__(self, opt, name, run_id, data_dict, job_type='Training'):
        # Pre-training routine --
        self.job_type = job_type
@@ -85,16 +99,17 @@ class WandbLogger():
        # It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call
        if isinstance(opt.resume, str):  # checks resume from artifact
            if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
-                run_id, project, model_artifact_name = get_run_info(opt.resume)
+                entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
                model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name
                assert wandb, 'install wandb to resume wandb runs'
                # Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config
-                self.wandb_run = wandb.init(id=run_id, project=project, resume='allow')
+                self.wandb_run = wandb.init(id=run_id, project=project, entity=entity, resume='allow')
                opt.resume = model_artifact_name
        elif self.wandb:
            self.wandb_run = wandb.init(config=opt,
                                        resume="allow",
-                                        project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
+                                        project='YOLOv3' if opt.project == 'runs/train' else Path(opt.project).stem,
+                                        entity=opt.entity,
                                        name=name,
                                        job_type=job_type,
                                        id=run_id) if not wandb.run else wandb.run
@@ -110,17 +125,17 @@ class WandbLogger():
                self.data_dict = self.check_and_upload_dataset(opt)
        else:
            prefix = colorstr('wandb: ')
-            print(f"{prefix}Install Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)")
+            print(f"{prefix}Install Weights & Biases for YOLOv3 logging with 'pip install wandb' (recommended)")

    def check_and_upload_dataset(self, opt):
        assert wandb, 'Install wandb to upload dataset'
        check_dataset(self.data_dict)
-        config_path = self.log_dataset_artifact(opt.data,
+        config_path = self.log_dataset_artifact(check_file(opt.data),
                                                opt.single_cls,
-                                                'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem)
+                                                'YOLOv3' if opt.project == 'runs/train' else Path(opt.project).stem)
        print("Created dataset config file ", config_path)
        with open(config_path) as f:
-            wandb_data_dict = yaml.load(f, Loader=yaml.SafeLoader)
+            wandb_data_dict = yaml.safe_load(f)
        return wandb_data_dict

    def setup_training(self, opt, data_dict):
@@ -158,7 +173,8 @@ class WandbLogger():

    def download_dataset_artifact(self, path, alias):
        if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX):
-            dataset_artifact = wandb.use_artifact(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
+            artifact_path = Path(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
+            dataset_artifact = wandb.use_artifact(artifact_path.as_posix())
            assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'"
            datadir = dataset_artifact.download()
            return datadir, dataset_artifact
@@ -171,8 +187,8 @@ class WandbLogger():
            modeldir = model_artifact.download()
            epochs_trained = model_artifact.metadata.get('epochs_trained')
            total_epochs = model_artifact.metadata.get('total_epochs')
-            assert epochs_trained < total_epochs, 'training to %g epochs is finished, nothing to resume.' % (
-                total_epochs)
+            is_finished = total_epochs is None
+            assert not is_finished, 'training is finished, can only resume incomplete runs.'
            return modeldir, model_artifact
        return None, None

@@ -187,18 +203,18 @@ class WandbLogger():
        })
        model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
        wandb.log_artifact(model_artifact,
-                           aliases=['latest', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
+                           aliases=['latest', 'last', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
        print("Saving model artifact on epoch ", epoch + 1)

    def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
        with open(data_file) as f:
-            data = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
+            data = yaml.safe_load(f)  # data dict
        nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names'])
        names = {k: v for k, v in enumerate(names)}  # to index dictionary
        self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(
-            data['train']), names, name='train') if data.get('train') else None
+            data['train'], rect=True, batch_size=1), names, name='train') if data.get('train') else None
        self.val_artifact = self.create_dataset_table(LoadImagesAndLabels(
-            data['val']), names, name='val') if data.get('val') else None
+            data['val'], rect=True, batch_size=1), names, name='val') if data.get('val') else None
        if data.get('train'):
            data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train')
        if data.get('val'):
@@ -206,7 +222,7 @@ class WandbLogger():
        path = data_file if overwrite_config else '_wandb.'.join(data_file.rsplit('.', 1))  # updated data.yaml path
        data.pop('download', None)
        with open(path, 'w') as f:
-            yaml.dump(data, f)
+            yaml.safe_dump(data, f)

        if self.job_type == 'Training':  # builds correct artifact pipeline graph
            self.wandb_run.use_artifact(self.val_artifact)
@@ -243,16 +259,12 @@ class WandbLogger():
        table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
        class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()])
        for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
-            height, width = shapes[0]
-            labels[:, 2:] = (xywh2xyxy(labels[:, 2:].view(-1, 4))) * torch.Tensor([width, height, width, height])
            box_data, img_classes = [], {}
-            for cls, *xyxy in labels[:, 1:].tolist():
+            for cls, *xywh in labels[:, 1:].tolist():
                cls = int(cls)
-                box_data.append({"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
+                box_data.append({"position": {"middle": [xywh[0], xywh[1]], "width": xywh[2], "height": xywh[3]},
                                 "class_id": cls,
-                                 "box_caption": "%s" % (class_to_id[cls]),
-                                 "scores": {"acc": 1},
-                                 "domain": "pixel"})
+                                 "box_caption": "%s" % (class_to_id[cls])})
                img_classes[cls] = class_to_id[cls]
            boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}}  # inference-space
            table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), json.dumps(img_classes),
@@ -294,7 +306,7 @@ class WandbLogger():
            if self.result_artifact:
                train_results = wandb.JoinedTable(self.val_table, self.result_table, "id")
                self.result_artifact.add(train_results, 'result')
-                wandb.log_artifact(self.result_artifact, aliases=['latest', 'epoch ' + str(self.current_epoch),
+                wandb.log_artifact(self.result_artifact, aliases=['latest', 'last', 'epoch ' + str(self.current_epoch),
                                                                  ('best' if best_result else '')])
                self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"])
                self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")