greenhouse/train.py

import argparse

import torch.distributed as dist
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.tensorboard import SummaryWriter

import test  # import test.py to get mAP after each epoch
from models import *
from utils.datasets import *
from utils.utils import *

mixed_precision = True
try:  # Mixed precision training https://github.com/NVIDIA/apex
    from apex import amp
except:
    print('Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex')
    mixed_precision = False  # not installed

wdir = 'weights' + os.sep  # weights dir
last = wdir + 'last.pt'
best = wdir + 'best.pt'
results_file = 'results.txt'

# Hyperparameters https://github.com/ultralytics/yolov3/issues/310

hyp = {'giou': 3.54,  # giou loss gain
       'cls': 37.4,  # cls loss gain
       'cls_pw': 1.0,  # cls BCELoss positive_weight
       'obj': 64.3,  # obj loss gain (*=img_size/320 if img_size != 320)
       'obj_pw': 1.0,  # obj BCELoss positive_weight
       'iou_t': 0.1,  # iou training threshold
       'lr0': 0.01,  # initial learning rate (SGD=5E-3, Adam=5E-4)
       'lrf': 0.0005,  # final learning rate (with cos scheduler)
       'momentum': 0.937,  # SGD momentum
       'weight_decay': 0.000484,  # optimizer weight decay
       'fl_gamma': 0.0,  # focal loss gamma (efficientDet default is gamma=1.5)
       'hsv_h': 0.0138,  # image HSV-Hue augmentation (fraction)
       'hsv_s': 0.678,  # image HSV-Saturation augmentation (fraction)
       'hsv_v': 0.36,  # image HSV-Value augmentation (fraction)
       'degrees': 1.98 * 0,  # image rotation (+/- deg)
       'translate': 0.05 * 0,  # image translation (+/- fraction)
       'scale': 0.05 * 0,  # image scale (+/- gain)
       'shear': 0.641 * 0}  # image shear (+/- deg)

# Overwrite hyp with hyp*.txt (optional)
f = glob.glob('hyp*.txt')
if f:
    print('Using %s' % f[0])
    for k, v in zip(hyp.keys(), np.loadtxt(f[0])):
        hyp[k] = v

# Print focal loss if gamma > 0
if hyp['fl_gamma']:
    print('Using FocalLoss(gamma=%g)' % hyp['fl_gamma'])


def train():
    cfg = opt.cfg
    data = opt.data
    epochs = opt.epochs  # 500200 batches at bs 64, 117263 images = 273 epochs
    batch_size = opt.batch_size
    accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64
    weights = opt.weights  # initial training weights
    imgsz_min, imgsz_max, imgsz_test = opt.img_size  # img sizes (min, max, test)

    # Image Sizes
    gs = 64  # (pixels) grid size
    assert math.fmod(imgsz_min, gs) == 0, '--img-size %g must be a %g-multiple' % (imgsz_min, gs)
    opt.multi_scale |= imgsz_min != imgsz_max  # multi if different (min, max)
    if opt.multi_scale:
        if imgsz_min == imgsz_max:
            imgsz_min //= 1.5
            imgsz_max //= 0.667
        grid_min, grid_max = imgsz_min // gs, imgsz_max // gs
        imgsz_min, imgsz_max = grid_min * gs, grid_max * gs
    img_size = imgsz_max  # initialize with max size

    # Configure run
    init_seeds()
    data_dict = parse_data_cfg(data)
    train_path = data_dict['train']
    test_path = data_dict['valid']
    nc = 1 if opt.single_cls else int(data_dict['classes'])  # number of classes
    hyp['cls'] *= nc / 80  # update coco-tuned hyp['cls'] to current dataset

    # Remove previous results
    for f in glob.glob('*_batch*.png') + glob.glob(results_file):
        os.remove(f)

    # Initialize model
    model = Darknet(cfg).to(device)

    # Optimizer
    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in dict(model.named_parameters()).items():
        if '.bias' in k:
            pg2 += [v]  # biases
        elif 'Conv2d.weight' in k:
            pg1 += [v]  # apply weight_decay
        else:
            pg0 += [v]  # all else

    if opt.adam:
        # hyp['lr0'] *= 0.1  # reduce lr (i.e. SGD=5E-3, Adam=5E-4)
        optimizer = optim.Adam(pg0, lr=hyp['lr0'])
        # optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1)
    else:
        optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
    optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
    del pg0, pg1, pg2

    start_epoch = 0
    best_fitness = 0.0
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        # possible weights are '*.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc.
        chkpt = torch.load(weights, map_location=device)

        # load model
        try:
            chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
            model.load_state_dict(chkpt['model'], strict=False)
        except KeyError as e:
            s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \
                "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights)
            raise KeyError(s) from e

        # load optimizer
        if chkpt['optimizer'] is not None:
            optimizer.load_state_dict(chkpt['optimizer'])
            best_fitness = chkpt['best_fitness']

        # load results
        if chkpt.get('training_results') is not None:
            with open(results_file, 'w') as file:
                file.write(chkpt['training_results'])  # write results.txt

        start_epoch = chkpt['epoch'] + 1
        del chkpt

    elif len(weights) > 0:  # darknet format
        # possible weights are '*.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
        load_darknet_weights(model, weights)

    # Mixed precision training https://github.com/NVIDIA/apex
    if mixed_precision:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)

    # Scheduler https://github.com/ultralytics/yolov3/issues/238
    lf = lambda x: (((1 + math.cos(
        x * math.pi / epochs)) / 2) ** 1.0) * 0.95 + 0.05  # cosine https://arxiv.org/pdf/1812.01187.pdf
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf, last_epoch=start_epoch - 1)
    # scheduler = lr_scheduler.MultiStepLR(optimizer, [round(epochs * x) for x in [0.8, 0.9]], 0.1, start_epoch - 1)

    # Plot lr schedule
    # y = []
    # for _ in range(epochs):
    #     scheduler.step()
    #     y.append(optimizer.param_groups[0]['lr'])
    # plt.plot(y, '.-', label='LambdaLR')
    # plt.xlabel('epoch')
    # plt.ylabel('LR')
    # plt.tight_layout()
    # plt.savefig('LR.png', dpi=300)

    # Initialize distributed training
    if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
        dist.init_process_group(backend='nccl',  # 'distributed backend'
                                init_method='tcp://127.0.0.1:9999',  # distributed training init method
                                world_size=1,  # number of nodes for distributed training
                                rank=0)  # distributed training node rank
        model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True)
        model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level

    # Dataset
    dataset = LoadImagesAndLabels(train_path, img_size, batch_size,
                                  augment=True,
                                  hyp=hyp,  # augmentation hyperparameters
                                  rect=opt.rect,  # rectangular training
                                  cache_images=opt.cache_images,
                                  single_cls=opt.single_cls)

    # Dataloader
    batch_size = min(batch_size, len(dataset))
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             num_workers=nw,
                                             shuffle=not opt.rect,  # Shuffle=True unless rectangular training is used
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

    # Testloader
    testloader = torch.utils.data.DataLoader(LoadImagesAndLabels(test_path, imgsz_test, batch_size,
                                                                 hyp=hyp,
                                                                 rect=True,
                                                                 cache_images=opt.cache_images,
                                                                 single_cls=opt.single_cls),
                                             batch_size=batch_size,
                                             num_workers=nw,
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

    # Model parameters
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights

    # Model EMA
    ema = torch_utils.ModelEMA(model)

    # Start training
    nb = len(dataloader)  # number of batches
    n_burn = max(3 * nb, 500)  # burn-in iterations, max(3 epochs, 500 iterations)
    maps = np.zeros(nc)  # mAP per class
    # torch.autograd.set_detect_anomaly(True)
    results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
    t0 = time.time()
    print('Image sizes %g - %g train, %g test' % (imgsz_min, imgsz_max, imgsz_test))
    print('Using %g dataloader workers' % nw)
    print('Starting training for %g epochs...' % epochs)
    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
        model.train()

        # Update image weights (optional)
        if dataset.image_weights:
            w = model.class_weights.cpu().numpy() * (1 - maps) ** 2  # class weights
            image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
            dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n)  # rand weighted idx

        mloss = torch.zeros(4).to(device)  # mean losses
        print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
        pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            # Burn-in
            if ni <= n_burn * 2:
                model.gr = np.interp(ni, [0, n_burn * 2], [0.0, 1.0])  # giou loss ratio (obj_loss = 1.0 or giou)
                if ni == n_burn:  # burnin complete
                    print_model_biases(model)

                for j, x in enumerate(optimizer.param_groups):
                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                    x['lr'] = np.interp(ni, [0, n_burn], [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
                    if 'momentum' in x:
                        x['momentum'] = np.interp(ni, [0, n_burn], [0.9, hyp['momentum']])

            # Multi-Scale training
            if opt.multi_scale:
                if ni / accumulate % 1 == 0:  #  adjust img_size (67% - 150%) every 1 batch
                    img_size = random.randrange(grid_min, grid_max + 1) * gs
                sf = img_size / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to 32-multiple)
                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)

            # Run model
            pred = model(imgs)

            # Compute loss
            loss, loss_items = compute_loss(pred, targets, model)
            if not torch.isfinite(loss):
                print('WARNING: non-finite loss, ending training ', loss_items)
                return results

            # Scale loss by nominal batch_size of 64
            loss *= batch_size / 64

            # Compute gradient
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Optimize accumulated gradient
            if ni % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()
                ema.update(model)

            # Print batch results
            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
            mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
            s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, len(targets), img_size)
            pbar.set_description(s)

            # Plot images with bounding boxes
            if ni < 1:
                f = 'train_batch%g.png' % i  # filename
                plot_images(imgs=imgs, targets=targets, paths=paths, fname=f)
                if tb_writer:
                    tb_writer.add_image(f, cv2.imread(f)[:, :, ::-1], dataformats='HWC')
                    # tb_writer.add_graph(model, imgs)  # add model to tensorboard

            # end batch ------------------------------------------------------------------------------------------------

        # Update scheduler
        scheduler.step()

        # Process epoch results
        ema.update_attr(model)
        final_epoch = epoch + 1 == epochs
        if not opt.notest or final_epoch:  # Calculate mAP
            is_coco = any([x in data for x in ['coco.data', 'coco2014.data', 'coco2017.data']]) and model.nc == 80
            results, maps = test.test(cfg,
                                      data,
                                      batch_size=batch_size,
                                      img_size=imgsz_test,
                                      model=ema.ema,
                                      save_json=final_epoch and is_coco,
                                      single_cls=opt.single_cls,
                                      dataloader=testloader)

        # Write epoch results
        with open(results_file, 'a') as f:
            f.write(s + '%10.3g' * 7 % results + '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
        if len(opt.name) and opt.bucket:
            os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (opt.bucket, opt.name))

        # Write Tensorboard results
        if tb_writer:
            tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss',
                    'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1',
                    'val/giou_loss', 'val/obj_loss', 'val/cls_loss']
            for x, tag in zip(list(mloss[:-1]) + list(results), tags):
                tb_writer.add_scalar(tag, x, epoch)

        # Update best mAP
        fi = fitness(np.array(results).reshape(1, -1))  # fitness_i = weighted combination of [P, R, mAP, F1]
        if fi > best_fitness:
            best_fitness = fi

        # Save training results
        save = (not opt.nosave) or (final_epoch and not opt.evolve)
        if save:
            with open(results_file, 'r') as f:
                # Create checkpoint
                chkpt = {'epoch': epoch,
                         'best_fitness': best_fitness,
                         'training_results': f.read(),
                         'model': ema.ema.module.state_dict() if hasattr(model, 'module') else ema.ema.state_dict(),
                         'optimizer': None if final_epoch else optimizer.state_dict()}

            # Save last checkpoint
            torch.save(chkpt, last)

            # Save best checkpoint
            if (best_fitness == fi) and not final_epoch:
                torch.save(chkpt, best)

            # Save backup every 10 epochs (optional)
            # if epoch > 0 and epoch % 10 == 0:
            #     torch.save(chkpt, wdir + 'backup%g.pt' % epoch)

            # Delete checkpoint
            del chkpt

        # end epoch ----------------------------------------------------------------------------------------------------

    # end training
    n = opt.name
    if len(n):
        n = '_' + n if not n.isnumeric() else n
        fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
        for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
            if os.path.exists(f1):
                os.rename(f1, f2)  # rename
                ispt = f2.endswith('.pt')  # is *.pt
                strip_optimizer(f2) if ispt else None  # strip optimizer
                os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None  # upload

    if not opt.evolve:
        plot_results()  # save as results.png
    print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
    dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
    torch.cuda.empty_cache()

    return results


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--epochs', type=int, default=300)  # 500200 batches at bs 16, 117263 COCO images = 273 epochs
    parser.add_argument('--batch-size', type=int, default=16)  # effective bs = batch_size * accumulate = 16 * 4 = 64
    parser.add_argument('--accumulate', type=int, default=4, help='batches to accumulate before optimizing')
    parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='*.cfg path')
    parser.add_argument('--data', type=str, default='data/coco2017.data', help='*.data path')
    parser.add_argument('--multi-scale', action='store_true', help='adjust (67%% - 150%%) img_size every 10 batches')
    parser.add_argument('--img-size', nargs='+', type=int, default=[512], help='[min_train, max-train, test] img sizes')
    parser.add_argument('--rect', action='store_true', help='rectangular training')
    parser.add_argument('--resume', action='store_true', help='resume training from last.pt')
    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
    parser.add_argument('--notest', action='store_true', help='only test final epoch')
    parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
    parser.add_argument('--weights', type=str, default='weights/yolov3-spp-ultralytics.pt', help='initial weights path')
    parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
    parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1 or cpu)')
    parser.add_argument('--adam', action='store_true', help='use adam optimizer')
    parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
    opt = parser.parse_args()
    opt.weights = last if opt.resume else opt.weights
    print(opt)
    opt.img_size.extend([opt.img_size[-1]] * (3 - len(opt.img_size)))  # extend to 3 sizes (min, max, test)
    device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size)
    if device.type == 'cpu':
        mixed_precision = False

    # scale hyp['obj'] by img_size (evolved at 320)
    # hyp['obj'] *= opt.img_size[0] / 320.

    tb_writer = None
    if not opt.evolve:  # Train normally
        print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
        tb_writer = SummaryWriter()
        train()  # train normally

    else:  # Evolve hyperparameters (optional)
        opt.notest, opt.nosave = True, True  # only test/save final epoch
        if opt.bucket:
            os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists

        for _ in range(1):  # generations to evolve
            if os.path.exists('evolve.txt'):  # if evolve.txt exists: select best hyps and mutate
                # Select parent(s)
                parent = 'single'  # parent selection method: 'single' or 'weighted'
                x = np.loadtxt('evolve.txt', ndmin=2)
                n = min(5, len(x))  # number of previous results to consider
                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
                w = fitness(x) - fitness(x).min()  # weights
                if parent == 'single' or len(x) == 1:
                    # x = x[random.randint(0, n - 1)]  # random selection
                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
                elif parent == 'weighted':
                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination

                # Mutate
                method, mp, s = 3, 0.9, 0.2  # method, mutation probability, sigma
                npr = np.random
                npr.seed(int(time.time()))
                g = np.array([1, 1, 1, 1, 1, 1, 1, 0, .1, 1, 0, 1, 1, 1, 1, 1, 1, 1])  # gains
                ng = len(g)
                if method == 1:
                    v = (npr.randn(ng) * npr.random() * g * s + 1) ** 2.0
                elif method == 2:
                    v = (npr.randn(ng) * npr.random(ng) * g * s + 1) ** 2.0
                elif method == 3:
                    v = np.ones(ng)
                    while all(v == 1):  # mutate until a change occurs (prevent duplicates)
                        # v = (g * (npr.random(ng) < mp) * npr.randn(ng) * s + 1) ** 2.0
                        v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
                    hyp[k] = x[i + 7] * v[i]  # mutate

            # Clip to limits
            keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale', 'fl_gamma']
            limits = [(1e-5, 1e-2), (0.00, 0.70), (0.60, 0.98), (0, 0.001), (0, .9), (0, .9), (0, .9), (0, .9), (0, 3)]
            for k, v in zip(keys, limits):
                hyp[k] = np.clip(hyp[k], v[0], v[1])

            # Train mutation
            results = train()

            # Write mutation results
            print_mutation(hyp, results, opt.bucket)

            # Plot results
            # plot_evolution_results(hyp)
-												Initial commit

											
										
										
											2018-08-26 10:51:39 +02:00
+								import argparse
-												updates

											
										
										
											2019-07-25 13:19:26 +02:00
+								import torch.distributed as dist
-												updates

											
										
										
											2019-04-17 15:52:51 +02:00
+								import torch.optim as optim
-												updates

											
										
										
											2019-05-30 19:02:55 +02:00
+								import torch.optim.lr_scheduler as lr_scheduler
-												Tensorboard out of try, iou_t to 0.10

											
										
										
											2020-04-20 09:57:15 -07:00
+								from torch.utils.tensorboard import SummaryWriter
-												multi_thread dataloader

											
										
										
											2019-03-21 14:48:40 +02:00
-												updates

											
										
										
											2019-06-24 13:43:17 +02:00
+								import test  # import test.py to get mAP after each epoch
-												Initial commit

											
										
										
											2018-08-26 10:51:39 +02:00
+								from models import *
 								from utils.datasets import *
 								from utils.utils import *
-												updates

											
										
										
											2019-07-24 18:02:26 +02:00
+								mixed_precision = True
 								try:  # Mixed precision training https://github.com/NVIDIA/apex
 								    from apex import amp
-												updates

											
										
										
											2019-08-01 18:29:57 +02:00
+								except:
-												Tensorboard out of try, iou_t to 0.10

											
										
										
											2020-04-20 09:57:15 -07:00
+								    print('Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex')
-												updates

											
										
										
											2019-08-01 18:29:57 +02:00
+								    mixed_precision = False  # not installed
-												updates

											
										
										
											2019-07-24 18:02:26 +02:00
-												updates

											
										
										
											2019-09-18 00:38:49 +02:00
+								wdir = 'weights' + os.sep  # weights dir
 								last = wdir + 'last.pt'
 								best = wdir + 'best.pt'
-												updates

											
										
										
											2019-09-18 00:54:07 +02:00
+								results_file = 'results.txt'
-												updates

											
										
										
											2019-09-18 00:38:49 +02:00
-												LR schedule to 0.05 min

											
										
										
											2020-03-29 13:29:06 -07:00
+								# Hyperparameters https://github.com/ultralytics/yolov3/issues/310
-												updates

											
										
										
											2019-12-07 00:01:18 -08:00
-												updates

											
										
										
											2019-12-06 23:58:47 -08:00
+								hyp = {'giou': 3.54,  # giou loss gain
 								       'cls': 37.4,  # cls loss gain
-												updates

											
										
										
											2019-10-25 11:03:04 -05:00
+								       'cls_pw': 1.0,  # cls BCELoss positive_weight
-												updates

											
										
										
											2020-02-16 23:13:34 -08:00
+								       'obj': 64.3,  # obj loss gain (*=img_size/320 if img_size != 320)
-												updates

											
										
										
											2019-10-25 11:03:04 -05:00
+								       'obj_pw': 1.0,  # obj BCELoss positive_weight
-												Tensorboard out of try, iou_t to 0.10

											
										
										
											2020-04-20 09:57:15 -07:00
+								       'iou_t': 0.1,  # iou training threshold
-												updates

											
										
										
											2020-03-04 13:06:31 -08:00
+								       'lr0': 0.01,  # initial learning rate (SGD=5E-3, Adam=5E-4)
-												LR schedule to 0.05 min

											
										
										
											2020-03-29 13:29:06 -07:00
+								       'lrf': 0.0005,  # final learning rate (with cos scheduler)
-												updates

											
										
										
											2019-12-06 23:58:47 -08:00
+								       'momentum': 0.937,  # SGD momentum
 								       'weight_decay': 0.000484,  # optimizer weight decay
-												Remove deprecated --arc architecture options, implement --arc default for all cases

											
										
										
											2020-03-16 20:46:25 -07:00
+								       'fl_gamma': 0.0,  # focal loss gamma (efficientDet default is gamma=1.5)
-												updates

											
										
										
											2019-12-06 23:58:47 -08:00
+								       'hsv_h': 0.0138,  # image HSV-Hue augmentation (fraction)
 								       'hsv_s': 0.678,  # image HSV-Saturation augmentation (fraction)
 								       'hsv_v': 0.36,  # image HSV-Value augmentation (fraction)
-												updates

											
										
										
											2020-03-10 12:17:23 -07:00
+								       'degrees': 1.98 * 0,  # image rotation (+/- deg)
 								       'translate': 0.05 * 0,  # image translation (+/- fraction)
 								       'scale': 0.05 * 0,  # image scale (+/- gain)
 								       'shear': 0.641 * 0}  # image shear (+/- deg)
-												updates

											
										
										
											2019-08-18 02:08:47 +02:00
-												updates

											
										
										
											2019-09-11 14:00:57 +02:00
+								# Overwrite hyp with hyp*.txt (optional)
 								f = glob.glob('hyp*.txt')
 								if f:
-												updates

											
										
										
											2019-12-01 13:51:55 -08:00
+								    print('Using %s' % f[0])
-												updates

											
										
										
											2019-09-11 14:00:57 +02:00
+								    for k, v in zip(hyp.keys(), np.loadtxt(f[0])):
 								        hyp[k] = v
-												updates

											
										
										
											2019-09-11 13:15:16 +02:00
-												tensorboard/focal loss reporting update

											
										
										
											2020-04-06 15:45:18 -07:00
+								# Print focal loss if gamma > 0
 								if hyp['fl_gamma']:
 								    print('Using FocalLoss(gamma=%g)' % hyp['fl_gamma'])
-												updates

											
										
										
											2019-08-18 13:05:32 +02:00
-												updates

											
										
										
											2019-08-23 13:25:27 +02:00
+								def train():
 								    cfg = opt.cfg
 								    data = opt.data
-												updates

											
										
										
											2020-01-10 16:09:36 -08:00
+								    epochs = opt.epochs  # 500200 batches at bs 64, 117263 images = 273 epochs
-												updates

											
										
										
											2019-08-23 13:25:27 +02:00
+								    batch_size = opt.batch_size
 								    accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64
-												updates

											
										
										
											2019-08-23 15:17:17 +02:00
+								    weights = opt.weights  # initial training weights
-												detailed image sizes report

											
										
										
											2020-04-14 11:51:19 -07:00
+								    imgsz_min, imgsz_max, imgsz_test = opt.img_size  # img sizes (min, max, test)
-												updates

											
										
										
											2019-08-23 13:25:27 +02:00
-												multi-scale update

											
										
										
											2020-04-12 18:22:54 -07:00
+								    # Image Sizes
 								    gs = 64  # (pixels) grid size
 								    assert math.fmod(imgsz_min, gs) == 0, '--img-size %g must be a %g-multiple' % (imgsz_min, gs)
 								    opt.multi_scale |= imgsz_min != imgsz_max  # multi if different (min, max)
-												updates

											
										
										
											2019-11-27 15:50:00 -10:00
+								    if opt.multi_scale:
-												multi-scale update

											
										
										
											2020-04-12 18:22:54 -07:00
+								        if imgsz_min == imgsz_max:
 								            imgsz_min //= 1.5
 								            imgsz_max //= 0.667
 								        grid_min, grid_max = imgsz_min // gs, imgsz_max // gs
-												detailed image sizes report

											
										
										
											2020-04-14 11:51:19 -07:00
+								        imgsz_min, imgsz_max = grid_min * gs, grid_max * gs
 								    img_size = imgsz_max  # initialize with max size
-												Initial commit

											
										
										
											2018-08-26 10:51:39 +02:00
 								    # Configure run
-												multi-scale update

											
										
										
											2020-04-12 18:22:54 -07:00
+								    init_seeds()
-												updates

											
										
										
											2019-07-20 15:10:31 +02:00
+								    data_dict = parse_data_cfg(data)
-												updates

											
										
										
											2019-04-27 17:57:07 +02:00
+								    train_path = data_dict['train']
-												updates

											
										
										
											2019-12-04 23:02:32 -08:00
+								    test_path = data_dict['valid']
-												updates

											
										
										
											2020-01-17 17:52:28 -08:00
+								    nc = 1 if opt.single_cls else int(data_dict['classes'])  # number of classes
-												update coco-tuned hyp['cls'] to current dataset

											
										
										
											2020-04-06 10:58:07 -07:00
+								    hyp['cls'] *= nc / 80  # update coco-tuned hyp['cls'] to current dataset
-												Initial commit

											
										
										
											2018-08-26 10:51:39 +02:00
-												updates

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>

											
										
										
											2019-09-04 09:20:03 +02:00
+								    # Remove previous results
-												change of test batch image format from .jpg to .png, due to matplotlib bug (#817)

Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>

											
										
										
											2020-01-31 00:48:26 +01:00
+								    for f in glob.glob('*_batch*.png') + glob.glob(results_file):
-												updates

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>

											
										
										
											2019-09-04 09:20:03 +02:00
+								        os.remove(f)
-												Initial commit

											
										
										
											2018-08-26 10:51:39 +02:00
+								    # Initialize model
-												Remove deprecated --arc architecture options, implement --arc default for all cases

											
										
										
											2020-03-16 20:46:25 -07:00
+								    model = Darknet(cfg).to(device)
-												Initial commit

											
										
										
											2018-08-26 10:51:39 +02:00
-												Merge branch 'master' of /Users/glennjocher/PycharmProjects/yolov3 with conflicts.

											
										
										
											2019-03-21 12:08:55 +02:00
+								    # Optimizer
-												updates

											
										
										
											2020-01-17 10:55:30 -08:00
+								    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
-												weight_decay fix

											
										
										
											2019-08-26 14:47:36 +02:00
+								    for k, v in dict(model.named_parameters()).items():
-												updates

											
										
										
											2020-01-17 10:55:30 -08:00
+								        if '.bias' in k:
 								            pg2 += [v]  # biases
 								        elif 'Conv2d.weight' in k:
 								            pg1 += [v]  # apply weight_decay
-												weight_decay fix

											
										
										
											2019-08-26 14:47:36 +02:00
+								        else:
-												updates

											
										
										
											2020-01-17 10:55:30 -08:00
+								            pg0 += [v]  # all else
-												weight_decay fix

											
										
										
											2019-08-26 14:47:36 +02:00
-												updates

											
										
										
											2019-09-11 14:25:48 +02:00
+								    if opt.adam:
-												updates

											
										
										
											2020-01-19 16:55:29 -08:00
+								        # hyp['lr0'] *= 0.1  # reduce lr (i.e. SGD=5E-3, Adam=5E-4)
-												updates

											
										
										
											2019-09-11 14:25:48 +02:00
+								        optimizer = optim.Adam(pg0, lr=hyp['lr0'])
 								        # optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1)
 								    else:
 								        optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
-												weight_decay fix

											
										
										
											2019-08-26 14:47:36 +02:00
+								    optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
-												updates

											
										
										
											2020-01-17 11:17:52 -08:00
+								    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
-												updates

											
										
										
											2020-01-17 10:55:30 -08:00
+								    del pg0, pg1, pg2
-												Initial commit

											
										
										
											2018-08-26 10:51:39 +02:00
-												updates

											
										
										
											2019-02-22 16:15:20 +01:00
+								    start_epoch = 0
-												updates

											
										
										
											2020-01-29 10:30:13 -08:00
+								    best_fitness = 0.0
-												updates

											
										
										
											2019-09-19 18:05:04 +02:00
+								    attempt_download(weights)
-												updates

											
										
										
											2019-08-23 15:17:17 +02:00
+								    if weights.endswith('.pt'):  # pytorch format
-												updates

											
										
										
											2019-11-14 17:22:09 -08:00
+								        # possible weights are '*.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc.
-												updates

											
										
										
											2019-08-23 15:17:17 +02:00
+								        chkpt = torch.load(weights, map_location=device)
-												updates

											
										
										
											2019-04-02 18:04:04 +02:00
-												updates

											
										
										
											2019-08-23 15:17:17 +02:00
+								        # load model
-												updates

											
										
										
											2019-11-25 11:45:28 -10:00
+								        try:
 								            chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
 								            model.load_state_dict(chkpt['model'], strict=False)
 								        except KeyError as e:
 								            s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \
 								                "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights)
 								            raise KeyError(s) from e
-												updates

											
										
										
											2019-04-02 18:04:04 +02:00
-												updates

											
										
										
											2019-08-23 15:17:17 +02:00
+								        # load optimizer
-												updates

											
										
										
											2019-04-02 18:04:04 +02:00
+								        if chkpt['optimizer'] is not None:
 								            optimizer.load_state_dict(chkpt['optimizer'])
-												updates

											
										
										
											2019-07-02 18:21:28 +02:00
+								            best_fitness = chkpt['best_fitness']
-												updates

											
										
										
											2019-07-08 18:00:19 +02:00
-												updates

											
										
										
											2019-08-23 15:17:17 +02:00
+								        # load results
-												prevent failure when no training_results available (#409)

Use `chkpt.get('training_results')` instead of `chkpt.get('training_results')` so if the dict doesn't contain this key it won't throw a `KeyError
											
										
										
											2019-07-31 15:12:27 +03:00
+								        if chkpt.get('training_results') is not None:
-												updates

											
										
										
											2019-09-18 02:25:09 +02:00
+								            with open(results_file, 'w') as file:
-												updates

											
										
										
											2019-07-08 19:26:46 +02:00
+								                file.write(chkpt['training_results'])  # write results.txt
-												updates

											
										
										
											2019-07-08 18:00:19 +02:00
 								        start_epoch = chkpt['epoch'] + 1
-												updates

											
										
										
											2019-04-02 18:04:04 +02:00
+								        del chkpt
-												initialize from darknet53

											
										
										
											2018-10-30 15:18:52 +01:00
-												updates

											
										
										
											2019-08-23 15:37:25 +02:00
+								    elif len(weights) > 0:  # darknet format
-												updates

											
										
										
											2019-11-14 17:22:09 -08:00
+								        # possible weights are '*.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
-												updates

											
										
										
											2020-01-17 10:55:30 -08:00
+								        load_darknet_weights(model, weights)
-												initialize from darknet53

											
										
										
											2018-10-30 15:18:52 +01:00
-												updates

											
										
										
											2020-02-27 13:40:14 -08:00
+								    # Mixed precision training https://github.com/NVIDIA/apex
 								    if mixed_precision:
 								        model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
-												updates

											
										
										
											2019-04-24 12:58:14 +02:00
+								    # Scheduler https://github.com/ultralytics/yolov3/issues/238
-												burnin merged with prebias

											
										
										
											2020-04-02 14:08:21 -07:00
+								    lf = lambda x: (((1 + math.cos(
 								        x * math.pi / epochs)) / 2) ** 1.0) * 0.95 + 0.05  # cosine https://arxiv.org/pdf/1812.01187.pdf
-												updates

											
										
										
											2020-03-11 12:18:03 -07:00
+								    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf, last_epoch=start_epoch - 1)
 								    # scheduler = lr_scheduler.MultiStepLR(optimizer, [round(epochs * x) for x in [0.8, 0.9]], 0.1, start_epoch - 1)
-												updates

											
										
										
											2019-04-18 21:56:50 +02:00
-												updates

											
										
										
											2020-03-11 12:18:03 -07:00
+								    # Plot lr schedule
-												updates

											
										
										
											2019-04-18 21:44:57 +02:00
+								    # y = []
 								    # for _ in range(epochs):
 								    #     scheduler.step()
 								    #     y.append(optimizer.param_groups[0]['lr'])
-												updates

											
										
										
											2020-02-22 21:24:56 -08:00
+								    # plt.plot(y, '.-', label='LambdaLR')
-												updates

											
										
										
											2019-04-24 12:58:14 +02:00
+								    # plt.xlabel('epoch')
-												updates

											
										
										
											2019-06-21 13:19:23 +02:00
+								    # plt.ylabel('LR')
-												updates

											
										
										
											2019-04-24 12:58:14 +02:00
+								    # plt.tight_layout()
 								    # plt.savefig('LR.png', dpi=300)
-												updates

											
										
										
											2019-04-17 16:15:08 +02:00
-												updates

											
										
										
											2019-07-24 18:02:26 +02:00
+								    # Initialize distributed training
-												updates

											
										
										
											2020-03-01 21:33:16 -08:00
+								    if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
-												updates

											
										
										
											2019-07-24 18:02:26 +02:00
+								        dist.init_process_group(backend='nccl',  # 'distributed backend'
 								                                init_method='tcp://127.0.0.1:9999',  # distributed training init method
 								                                world_size=1,  # number of nodes for distributed training
 								                                rank=0)  # distributed training node rank
-												Update train.py for distributive programming (#655)

When attempting to running this function in a multi-GPU environment I kept on getting a runtime issue. I was able to solve this problem by passing this keyword. I first found the solution here: 
https://github.com/pytorch/pytorch/issues/22436
and in the pytorch tutorial

'RuntimeError: Expected to have finished reduction in the prior iteration before starting a new one. This error indicates that your module has parameters that were not used in producing loss. You can enable unused parameter detection by (1) passing the keyword argument find_unused_parameters=True to torch.nn.parallel.DistributedDataParallel; (2) making sure all forward function outputs participate in calculating loss. If you already have done the above two steps, then the distributed data parallel module wasn't able to locate the output tensors in the return value of your module's forward function. Please include the loss function and the structure of the return value of forward of your module when reporting this issue (e.g. list, dict, iterable). '
											
										
										
											2019-11-25 03:21:36 -05:00
+								        model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True)
-												updates

											
										
										
											2019-08-05 17:25:50 +02:00
+								        model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level
-												updates

											
										
										
											2019-07-24 18:02:26 +02:00
-												Add collate_fn() to DataLoader (#163)

Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets.
											
										
										
											2019-03-25 14:59:38 +01:00
+								    # Dataset
-												updates

											
										
										
											2019-12-04 23:02:32 -08:00
+								    dataset = LoadImagesAndLabels(train_path, img_size, batch_size,
-												updates

											
										
										
											2019-05-21 17:37:34 +02:00
+								                                  augment=True,
-												updates

											
										
										
											2019-07-20 14:54:37 +02:00
+								                                  hyp=hyp,  # augmentation hyperparameters
-												updates

											
										
										
											2019-07-30 17:51:19 +02:00
+								                                  rect=opt.rect,  # rectangular training
-												updates

											
										
										
											2020-01-17 17:52:28 -08:00
+								                                  cache_images=opt.cache_images,
 								                                  single_cls=opt.single_cls)
-												Add collate_fn() to DataLoader (#163)

Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets.
											
										
										
											2019-03-25 14:59:38 +01:00
 								    # Dataloader
-												updates

											
										
										
											2019-11-20 19:34:22 -08:00
+								    batch_size = min(batch_size, len(dataset))
-												updates

											
										
										
											2019-12-04 23:02:32 -08:00
+								    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
-												updates

											
										
										
											2019-07-24 15:56:10 +02:00
+								    dataloader = torch.utils.data.DataLoader(dataset,
 								                                             batch_size=batch_size,
-												updates

											
										
										
											2019-12-04 15:15:23 -08:00
+								                                             num_workers=nw,
-												updates

											
										
										
											2019-07-24 15:56:10 +02:00
+								                                             shuffle=not opt.rect,  # Shuffle=True unless rectangular training is used
 								                                             pin_memory=True,
 								                                             collate_fn=dataset.collate_fn)
-												Initial commit

											
										
										
											2018-08-26 10:51:39 +02:00
-												updates

											
										
										
											2020-01-10 16:09:36 -08:00
+								    # Testloader
-												detailed image sizes report

											
										
										
											2020-04-14 11:51:19 -07:00
+								    testloader = torch.utils.data.DataLoader(LoadImagesAndLabels(test_path, imgsz_test, batch_size,
-												updates

											
										
										
											2020-01-10 16:09:36 -08:00
+								                                                                 hyp=hyp,
 								                                                                 rect=True,
-												updates

											
										
										
											2020-01-17 17:52:28 -08:00
+								                                                                 cache_images=opt.cache_images,
 								                                                                 single_cls=opt.single_cls),
-												NMS and test batch_size updates

											
										
										
											2020-03-29 20:41:32 -07:00
+								                                             batch_size=batch_size,
-												updates

											
										
										
											2020-01-10 16:09:36 -08:00
+								                                             num_workers=nw,
 								                                             pin_memory=True,
 								                                             collate_fn=dataset.collate_fn)
-												updates

											
										
										
											2019-12-04 23:02:32 -08:00
-												updates

											
										
										
											2020-03-13 20:12:54 -07:00
+								    # Model parameters
-												updates

											
										
										
											2019-08-05 16:59:32 +02:00
+								    model.nc = nc  # attach number of classes to model
-												updates

											
										
										
											2019-04-17 15:52:51 +02:00
+								    model.hyp = hyp  # attach hyperparameters to model
-												burnin merged with prebias

											
										
										
											2020-04-02 14:08:21 -07:00
+								    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
-												updates

											
										
										
											2019-11-20 13:36:15 -08:00
+								    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
-												updates

											
										
										
											2020-03-13 20:12:54 -07:00
 								    # Model EMA
-												EMA implemented by default

											
										
										
											2020-03-29 13:14:54 -07:00
+								    ema = torch_utils.ModelEMA(model)
-												updates

											
										
										
											2020-03-13 20:12:54 -07:00
 								    # Start training
 								    nb = len(dataloader)  # number of batches
-												training updates

											
										
										
											2020-04-08 21:34:34 -07:00
+								    n_burn = max(3 * nb, 500)  # burn-in iterations, max(3 epochs, 500 iterations)
-												add *.jpeg support

											
										
										
											2019-05-10 14:15:09 +02:00
+								    maps = np.zeros(nc)  # mAP per class
-												updates

											
										
										
											2019-11-25 17:24:05 -10:00
+								    # torch.autograd.set_detect_anomaly(True)
-												removed xy/wh loss reporting

											
										
										
											2019-08-24 17:16:20 +02:00
+								    results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
-												updates

											
										
										
											2019-07-16 17:56:39 +02:00
+								    t0 = time.time()
-												move image size report

											
										
										
											2020-04-15 22:03:51 -07:00
+								    print('Image sizes %g - %g train, %g test' % (imgsz_min, imgsz_max, imgsz_test))
-												updates

											
										
										
											2019-12-08 17:57:23 -08:00
+								    print('Using %g dataloader workers' % nw)
-												updates

											
										
										
											2020-01-10 16:09:36 -08:00
+								    print('Starting training for %g epochs...' % epochs)
-												updates

											
										
										
											2020-02-24 12:21:47 -08:00
+								    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
-												multi_gpu (#135)

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

											
										
										
											2019-03-17 23:45:39 +02:00
+								        model.train()
-												Adam to SGD with burn-in

											
										
										
											2018-09-20 18:03:19 +02:00
-												updates

											
										
										
											2019-07-30 17:51:19 +02:00
+								        # Update image weights (optional)
 								        if dataset.image_weights:
-												updates

											
										
										
											2019-08-02 01:33:24 +02:00
+								            w = model.class_weights.cpu().numpy() * (1 - maps) ** 2  # class weights
-												updates

											
										
										
											2019-07-30 17:51:19 +02:00
+								            image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
 								            dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n)  # rand weighted idx
-												add *.jpeg support

											
										
										
											2019-05-10 14:15:09 +02:00
-												removed xy/wh loss reporting

											
										
										
											2019-08-24 16:43:43 +02:00
+								        mloss = torch.zeros(4).to(device)  # mean losses
-												updates

											
										
										
											2020-01-17 17:52:28 -08:00
+								        print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
-												updates

											
										
										
											2019-06-30 17:34:29 +02:00
+								        pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
-												updates

											
										
										
											2019-08-23 00:36:48 +02:00
+								        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
-												updates

											
										
										
											2019-08-23 13:39:43 +02:00
+								            ni = i + nb * epoch  # number integrated batches (since train start)
-												updates

											
										
										
											2019-12-08 17:52:44 -08:00
+								            imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
-												Add collate_fn() to DataLoader (#163)

Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets.
											
										
										
											2019-03-25 14:59:38 +01:00
+								            targets = targets.to(device)
-												simplify train.py

											
										
										
											2018-09-19 04:21:46 +02:00
-												burnin merged with prebias

											
										
										
											2020-04-02 14:08:21 -07:00
+								            # Burn-in
-												training updates

											
										
										
											2020-04-08 21:01:58 -07:00
+								            if ni <= n_burn * 2:
 								                model.gr = np.interp(ni, [0, n_burn * 2], [0.0, 1.0])  # giou loss ratio (obj_loss = 1.0 or giou)
-												burnin merged with prebias

											
										
										
											2020-04-02 14:08:21 -07:00
+								                if ni == n_burn:  # burnin complete
 								                    print_model_biases(model)
 								                for j, x in enumerate(optimizer.param_groups):
 								                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
 								                    x['lr'] = np.interp(ni, [0, n_burn], [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
-												burnin lr ramp 300 iterations

											
										
										
											2020-03-30 19:27:42 -07:00
+								                    if 'momentum' in x:
-												burnin merged with prebias

											
										
										
											2020-04-02 14:08:21 -07:00
+								                        x['momentum'] = np.interp(ni, [0, n_burn], [0.9, hyp['momentum']])
-												updates

											
										
										
											2020-02-05 20:35:54 -08:00
 								            # Multi-Scale training
 								            if opt.multi_scale:
-												updates

											
										
										
											2020-02-09 09:12:45 -08:00
+								                if ni / accumulate % 1 == 0:  #  adjust img_size (67% - 150%) every 1 batch
-												multi-scale update

											
										
										
											2020-04-12 18:22:54 -07:00
+								                    img_size = random.randrange(grid_min, grid_max + 1) * gs
-												updates

											
										
										
											2020-02-05 20:35:54 -08:00
+								                sf = img_size / max(imgs.shape[2:])  # scale factor
 								                if sf != 1:
-												parameterize grid size

											
										
										
											2020-04-08 10:14:33 -07:00
+								                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to 32-multiple)
-												updates

											
										
										
											2020-02-05 20:35:54 -08:00
+								                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
-												multi_gpu (#135)

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

											
										
										
											2019-03-17 23:45:39 +02:00
+								            # Run model
-												Add collate_fn() to DataLoader (#163)

Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets.
											
										
										
											2019-03-25 14:59:38 +01:00
+								            pred = model(imgs)
-												multi_gpu (#135)

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

											
										
										
											2019-03-17 23:45:39 +02:00
-												updates

											
										
										
											2019-03-07 17:16:38 +01:00
+								            # Compute loss
-												updates

											
										
										
											2020-03-04 13:20:08 -08:00
+								            loss, loss_items = compute_loss(pred, targets, model)
-												weight_decay fix

											
										
										
											2019-08-31 17:55:19 +02:00
+								            if not torch.isfinite(loss):
-												updates

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>

											
										
										
											2019-09-02 11:59:13 +02:00
+								                print('WARNING: non-finite loss, ending training ', loss_items)
 								                return results
-												updates

											
										
										
											2019-03-07 17:16:38 +01:00
-												weight_decay fix

											
										
										
											2019-08-26 16:24:19 +02:00
+								            # Scale loss by nominal batch_size of 64
 								            loss *= batch_size / 64
-												updates

											
										
										
											2019-08-24 23:58:08 +02:00
-												updates

											
										
										
											2019-03-07 17:16:38 +01:00
+								            # Compute gradient
-												updates

											
										
										
											2019-04-13 16:02:45 +02:00
+								            if mixed_precision:
 								                with amp.scale_loss(loss, optimizer) as scaled_loss:
 								                    scaled_loss.backward()
 								            else:
 								                loss.backward()
-												clean up train.py

											
										
										
											2018-10-09 19:22:33 +02:00
-												updates

											
										
										
											2020-02-24 12:44:22 -08:00
+								            # Optimize accumulated gradient
-												updates

											
										
										
											2019-08-23 13:31:32 +02:00
+								            if ni % accumulate == 0:
-												updates

											
										
										
											2018-12-16 15:16:19 +01:00
+								                optimizer.step()
 								                optimizer.zero_grad()
-												EMA implemented by default

											
										
										
											2020-03-29 13:14:54 -07:00
+								                ema.update(model)
-												simplify train.py

											
										
										
											2018-09-19 04:21:46 +02:00
-												updates

											
										
										
											2019-04-15 13:55:52 +02:00
+								            # Print batch results
-												updates

											
										
										
											2019-05-23 12:32:11 +02:00
+								            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
-												updates

											
										
										
											2020-02-05 20:27:01 -08:00
+								            mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
 								            s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, len(targets), img_size)
-												weight_decay fix

											
										
										
											2019-08-29 14:29:07 +02:00
+								            pbar.set_description(s)
-												updates

											
										
										
											2019-08-24 21:20:25 +02:00
-												burnin lr ramp 300 iterations

											
										
										
											2020-03-30 19:27:42 -07:00
+								            # Plot images with bounding boxes
 								            if ni < 1:
 								                f = 'train_batch%g.png' % i  # filename
 								                plot_images(imgs=imgs, targets=targets, paths=paths, fname=f)
 								                if tb_writer:
 								                    tb_writer.add_image(f, cv2.imread(f)[:, :, ::-1], dataformats='HWC')
-												tensorboard updates

											
										
										
											2020-04-04 19:34:39 -07:00
+								                    # tb_writer.add_graph(model, imgs)  # add model to tensorboard
-												burnin lr ramp 300 iterations

											
										
										
											2020-03-30 19:27:42 -07:00
-												weight_decay fix

											
										
										
											2019-08-29 14:29:07 +02:00
+								            # end batch ------------------------------------------------------------------------------------------------
-												updates

											
										
										
											2020-02-24 12:44:22 -08:00
+								        # Update scheduler
 								        scheduler.step()
-												weight_decay fix

											
										
										
											2019-08-29 14:29:07 +02:00
+								        # Process epoch results
-												EMA implemented by default

											
										
										
											2020-03-29 13:14:54 -07:00
+								        ema.update_attr(model)
-												removed xy/wh loss reporting

											
										
										
											2019-08-24 21:35:56 +02:00
+								        final_epoch = epoch + 1 == epochs
-												updates

											
										
										
											2020-01-17 17:52:28 -08:00
+								        if not opt.notest or final_epoch:  # Calculate mAP
-												updates

											
										
										
											2019-12-20 09:07:25 -08:00
+								            is_coco = any([x in data for x in ['coco.data', 'coco2014.data', 'coco2017.data']]) and model.nc == 80
 								            results, maps = test.test(cfg,
 								                                      data,
-												NMS and test batch_size updates

											
										
										
											2020-03-29 20:41:32 -07:00
+								                                      batch_size=batch_size,
-												detailed image sizes report

											
										
										
											2020-04-14 11:51:19 -07:00
+								                                      img_size=imgsz_test,
-												EMA implemented by default

											
										
										
											2020-03-29 13:14:54 -07:00
+								                                      model=ema.ema,
-												updates

											
										
										
											2019-12-20 09:07:25 -08:00
+								                                      save_json=final_epoch and is_coco,
-												updates

											
										
										
											2020-01-17 17:58:37 -08:00
+								                                      single_cls=opt.single_cls,
 								                                      dataloader=testloader)
-												updates

											
										
										
											2019-04-05 15:34:42 +02:00
 								        # Write epoch results
-												updates

											
										
										
											2019-09-18 00:54:07 +02:00
+								        with open(results_file, 'a') as f:
 								            f.write(s + '%10.3g' * 7 % results + '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
-												updates

											
										
										
											2020-01-10 16:09:36 -08:00
+								        if len(opt.name) and opt.bucket:
-												updates

											
										
										
											2020-01-21 23:18:34 -08:00
+								            os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (opt.bucket, opt.name))
-												updates

											
										
										
											2019-04-05 15:34:42 +02:00
-												Tensorboard support (#435)


											
										
										
											2019-08-08 16:30:34 -04:00
+								        # Write Tensorboard results
-												tensorboard updates

											
										
										
											2019-08-09 16:37:19 +02:00
+								        if tb_writer:
-												tensorboard updates

											
										
										
											2020-04-04 19:34:39 -07:00
+								            tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss',
 								                    'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1',
 								                    'val/giou_loss', 'val/obj_loss', 'val/cls_loss']
 								            for x, tag in zip(list(mloss[:-1]) + list(results), tags):
 								                tb_writer.add_scalar(tag, x, epoch)
-												Tensorboard support (#435)


											
										
										
											2019-08-08 16:30:34 -04:00
-												updates

											
										
										
											2019-08-24 21:20:25 +02:00
+								        # Update best mAP
-												updates

											
										
										
											2020-01-29 10:30:13 -08:00
+								        fi = fitness(np.array(results).reshape(1, -1))  # fitness_i = weighted combination of [P, R, mAP, F1]
 								        if fi > best_fitness:
 								            best_fitness = fi
-												multi_gpu (#135)

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

											
										
										
											2019-03-17 23:45:39 +02:00
-												multi_gpu multi_scale

											
										
										
											2019-03-19 10:38:32 +02:00
+								        # Save training results
-												updates

											
										
										
											2020-01-10 16:09:36 -08:00
+								        save = (not opt.nosave) or (final_epoch and not opt.evolve)
-												multi_gpu (#135)

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

											
										
										
											2019-03-17 23:45:39 +02:00
+								        if save:
-												updates

											
										
										
											2019-09-18 00:54:07 +02:00
+								            with open(results_file, 'r') as f:
-												updates

											
										
										
											2019-07-08 18:00:19 +02:00
+								                # Create checkpoint
 								                chkpt = {'epoch': epoch,
 								                         'best_fitness': best_fitness,
-												updates

											
										
										
											2019-09-18 00:54:07 +02:00
+								                         'training_results': f.read(),
-												EMA implemented by default

											
										
										
											2020-03-29 13:14:54 -07:00
+								                         'model': ema.ema.module.state_dict() if hasattr(model, 'module') else ema.ema.state_dict(),
-												updates

											
										
										
											2019-08-23 12:57:26 +02:00
+								                         'optimizer': None if final_epoch else optimizer.state_dict()}
-												updates

											
										
										
											2019-04-05 15:34:42 +02:00
-												updates

											
										
										
											2019-07-15 17:54:31 +02:00
+								            # Save last checkpoint
 								            torch.save(chkpt, last)
-												multi_gpu (#135)

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

											
										
										
											2019-03-17 23:45:39 +02:00
 								            # Save best checkpoint
-												burnin merged with prebias

											
										
										
											2020-04-02 14:08:21 -07:00
+								            if (best_fitness == fi) and not final_epoch:
-												updates

											
										
										
											2019-04-02 18:04:04 +02:00
+								                torch.save(chkpt, best)
-												multi_gpu (#135)

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

											
										
										
											2019-03-17 23:45:39 +02:00
-												updates

											
										
										
											2019-04-02 18:04:04 +02:00
+								            # Save backup every 10 epochs (optional)
-												updates

											
										
										
											2020-01-21 16:18:24 -08:00
+								            # if epoch > 0 and epoch % 10 == 0:
 								            #     torch.save(chkpt, wdir + 'backup%g.pt' % epoch)
-												updates

											
										
										
											2019-04-02 16:33:52 +02:00
-												updates

											
										
										
											2019-04-05 15:34:42 +02:00
+								            # Delete checkpoint
-												weight_decay fix

											
										
										
											2019-08-29 14:29:07 +02:00
+								            del chkpt
 								        # end epoch ----------------------------------------------------------------------------------------------------
-												Initial commit

											
										
										
											2018-08-26 10:51:39 +02:00
-												updates

											
										
										
											2019-09-09 22:42:38 +02:00
+								    # end training
-												updates

											
										
										
											2020-01-05 12:50:58 -08:00
+								    n = opt.name
-												updates

											
										
										
											2020-01-10 16:09:36 -08:00
+								    if len(n):
-												updates

											
										
										
											2020-01-05 12:50:58 -08:00
+								        n = '_' + n if not n.isnumeric() else n
-												auto strip optimizer from best.pt after training

											
										
										
											2020-04-09 19:53:29 -07:00
+								        fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
 								        for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
 								            if os.path.exists(f1):
 								                os.rename(f1, f2)  # rename
 								                ispt = f2.endswith('.pt')  # is *.pt
 								                strip_optimizer(f2) if ispt else None  # strip optimizer
 								                os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None  # upload
-												updates

											
										
										
											2019-11-17 18:48:50 -08:00
-												updates

											
										
										
											2020-01-12 16:18:29 -08:00
+								    if not opt.evolve:
 								        plot_results()  # save as results.png
-												updates

											
										
										
											2019-08-24 21:39:25 +02:00
+								    print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
-												updates

											
										
										
											2019-07-24 19:31:38 +02:00
+								    dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
-												updates

											
										
										
											2019-07-24 00:22:07 +02:00
+								    torch.cuda.empty_cache()
-												updates

											
										
										
											2019-10-16 01:40:40 +02:00
-												updates

											
										
										
											2019-04-17 16:15:08 +02:00
+								    return results
-												Initial commit

											
										
										
											2018-08-26 10:51:39 +02:00
 								if __name__ == '__main__':
-												Fix argument parser bad practice

Keep parsing inside __main__ block and call methods with arguments

Add double -- for long argument names (- reserved for shortcuts)

											
										
										
											2018-12-05 14:31:08 +01:00
+								    parser = argparse.ArgumentParser()
-												updates

											
										
										
											2020-03-05 12:30:11 -08:00
+								    parser.add_argument('--epochs', type=int, default=300)  # 500200 batches at bs 16, 117263 COCO images = 273 epochs
-												updates

											
										
										
											2019-12-08 16:34:27 -08:00
+								    parser.add_argument('--batch-size', type=int, default=16)  # effective bs = batch_size * accumulate = 16 * 4 = 64
-												updates

											
										
										
											2019-12-02 11:31:19 -08:00
+								    parser.add_argument('--accumulate', type=int, default=4, help='batches to accumulate before optimizing')
-												updates

											
										
										
											2019-12-15 12:47:53 -08:00
+								    parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='*.cfg path')
 								    parser.add_argument('--data', type=str, default='data/coco2017.data', help='*.data path')
-												Fix argparse string escapes in train.py. (#1045)


											
										
										
											2020-04-12 11:00:50 -06:00
+								    parser.add_argument('--multi-scale', action='store_true', help='adjust (67%% - 150%%) img_size every 10 batches')
-												multi-scale update

											
										
										
											2020-04-12 18:22:54 -07:00
+								    parser.add_argument('--img-size', nargs='+', type=int, default=[512], help='[min_train, max-train, test] img sizes')
-												updates

											
										
										
											2019-07-08 15:02:20 +02:00
+								    parser.add_argument('--rect', action='store_true', help='rectangular training')
-												updates

											
										
										
											2019-08-23 13:25:27 +02:00
+								    parser.add_argument('--resume', action='store_true', help='resume training from last.pt')
-												updates

											
										
										
											2019-06-24 14:46:00 +02:00
+								    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
-												updates

											
										
										
											2019-04-17 17:27:51 +02:00
+								    parser.add_argument('--notest', action='store_true', help='only test final epoch')
-												updates

											
										
										
											2019-07-01 17:17:29 +02:00
+								    parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
-												updates

											
										
										
											2019-07-08 18:32:31 +02:00
+								    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
-												updates

											
										
										
											2019-08-07 16:45:13 +02:00
+								    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
-												add yolov3-spp-ultralytics.pt

											
										
										
											2020-02-16 23:12:07 -08:00
+								    parser.add_argument('--weights', type=str, default='weights/yolov3-spp-ultralytics.pt', help='initial weights path')
-												updates

											
										
										
											2019-09-09 22:42:38 +02:00
+								    parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
-												updates

											
										
										
											2019-11-24 18:38:30 -10:00
+								    parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1 or cpu)')
-												updates

											
										
										
											2019-09-11 14:25:48 +02:00
+								    parser.add_argument('--adam', action='store_true', help='use adam optimizer')
-												updates

											
										
										
											2020-01-17 17:52:28 -08:00
+								    parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
-												Fix argument parser bad practice

Keep parsing inside __main__ block and call methods with arguments

Add double -- for long argument names (- reserved for shortcuts)

											
										
										
											2018-12-05 14:31:08 +01:00
+								    opt = parser.parse_args()
-												updates

											
										
										
											2019-09-18 00:38:49 +02:00
+								    opt.weights = last if opt.resume else opt.weights
-												updates

											
										
										
											2019-05-03 18:14:16 +02:00
+								    print(opt)
-												multi-scale update

											
										
										
											2020-04-12 18:22:54 -07:00
+								    opt.img_size.extend([opt.img_size[-1]] * (3 - len(opt.img_size)))  # extend to 3 sizes (min, max, test)
-												updates

											
										
										
											2019-11-24 18:29:29 -10:00
+								    device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size)
-												updates

											
										
										
											2019-11-20 13:14:24 -08:00
+								    if device.type == 'cpu':
 								        mixed_precision = False
-												Fix argument parser bad practice

Keep parsing inside __main__ block and call methods with arguments

Add double -- for long argument names (- reserved for shortcuts)

											
										
										
											2018-12-05 14:31:08 +01:00
-												updates

											
										
										
											2019-12-21 20:17:56 -08:00
+								    # scale hyp['obj'] by img_size (evolved at 320)
-												updates

											
										
										
											2020-01-17 19:42:04 -08:00
+								    # hyp['obj'] *= opt.img_size[0] / 320.
-												updates

											
										
										
											2019-11-09 10:56:38 -08:00
-												tensorboard updates

											
										
										
											2019-08-09 19:35:02 +02:00
+								    tb_writer = None
-												updates

											
										
										
											2019-07-24 19:02:24 +02:00
+								    if not opt.evolve:  # Train normally
-												Tensorboard out of try, iou_t to 0.10

											
										
										
											2020-04-20 09:57:15 -07:00
+								        print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
 								        tb_writer = SummaryWriter()
-												updates

											
										
										
											2019-08-24 21:20:25 +02:00
+								        train()  # train normally
-												updates

											
										
										
											2019-07-24 19:02:24 +02:00
 								    else:  # Evolve hyperparameters (optional)
-												updates

											
										
										
											2020-01-30 14:32:10 -08:00
+								        opt.notest, opt.nosave = True, True  # only test/save final epoch
-												updates

											
										
										
											2019-07-24 19:02:24 +02:00
+								        if opt.bucket:
 								            os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists
-												updates

											
										
										
											2019-04-17 17:51:39 +02:00
-												updates

											
										
										
											2019-12-23 15:43:00 -08:00
+								        for _ in range(1):  # generations to evolve
-												updates

											
										
										
											2019-07-24 20:16:35 +02:00
+								            if os.path.exists('evolve.txt'):  # if evolve.txt exists: select best hyps and mutate
-												updates

											
										
										
											2019-09-18 13:23:37 +02:00
+								                # Select parent(s)
-												updates

											
										
										
											2020-01-12 15:56:42 -08:00
+								                parent = 'single'  # parent selection method: 'single' or 'weighted'
-												updates

											
										
										
											2020-01-22 11:06:52 -08:00
+								                x = np.loadtxt('evolve.txt', ndmin=2)
-												updates

											
										
										
											2020-01-29 14:26:37 -08:00
+								                n = min(5, len(x))  # number of previous results to consider
-												updates

											
										
										
											2020-01-22 11:06:52 -08:00
+								                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
-												updates

											
										
										
											2020-01-22 18:17:08 -08:00
+								                w = fitness(x) - fitness(x).min()  # weights
-												updates

											
										
										
											2019-09-20 20:31:37 +02:00
+								                if parent == 'single' or len(x) == 1:
-												updates

											
										
										
											2020-01-22 18:17:08 -08:00
+								                    # x = x[random.randint(0, n - 1)]  # random selection
 								                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
-												updates

											
										
										
											2020-01-22 11:08:03 -08:00
+								                elif parent == 'weighted':
-												updates

											
										
										
											2020-01-22 18:17:08 -08:00
+								                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
-												updates

											
										
										
											2019-07-24 19:02:24 +02:00
 								                # Mutate
-												updates

											
										
										
											2020-01-29 15:31:19 -08:00
+								                method, mp, s = 3, 0.9, 0.2  # method, mutation probability, sigma
-												updates

											
										
										
											2020-01-29 14:26:37 -08:00
+								                npr = np.random
 								                npr.seed(int(time.time()))
-												updates

											
										
										
											2020-01-19 16:56:32 -08:00
+								                g = np.array([1, 1, 1, 1, 1, 1, 1, 0, .1, 1, 0, 1, 1, 1, 1, 1, 1, 1])  # gains
-												updates

											
										
										
											2020-01-12 15:56:42 -08:00
+								                ng = len(g)
-												updates

											
										
										
											2020-01-14 22:22:24 -08:00
+								                if method == 1:
-												updates

											
										
										
											2020-01-29 14:26:37 -08:00
+								                    v = (npr.randn(ng) * npr.random() * g * s + 1) ** 2.0
-												updates

											
										
										
											2020-01-14 22:22:24 -08:00
+								                elif method == 2:
-												updates

											
										
										
											2020-01-29 14:26:37 -08:00
+								                    v = (npr.randn(ng) * npr.random(ng) * g * s + 1) ** 2.0
-												updates

											
										
										
											2020-01-14 22:22:24 -08:00
+								                elif method == 3:
-												updates

											
										
										
											2020-01-12 15:56:42 -08:00
+								                    v = np.ones(ng)
-												updates

											
										
										
											2020-01-19 15:37:56 -08:00
+								                    while all(v == 1):  # mutate until a change occurs (prevent duplicates)
-												updates

											
										
										
											2020-01-29 14:26:37 -08:00
+								                        # v = (g * (npr.random(ng) < mp) * npr.randn(ng) * s + 1) ** 2.0
 								                        v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
-												updates

											
										
										
											2020-01-14 22:22:24 -08:00
+								                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
-												updates

											
										
										
											2020-01-12 15:56:42 -08:00
+								                    hyp[k] = x[i + 7] * v[i]  # mutate
-												updates

											
										
										
											2019-04-17 17:27:51 +02:00
-												updates

											
										
										
											2019-04-24 14:09:15 +02:00
+								            # Clip to limits
-												updates

											
										
										
											2019-09-10 11:35:46 +02:00
+								            keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale', 'fl_gamma']
-												updates

											
										
										
											2020-01-10 23:28:54 -08:00
+								            limits = [(1e-5, 1e-2), (0.00, 0.70), (0.60, 0.98), (0, 0.001), (0, .9), (0, .9), (0, .9), (0, .9), (0, 3)]
-												updates

											
										
										
											2019-04-24 14:09:15 +02:00
+								            for k, v in zip(keys, limits):
 								                hyp[k] = np.clip(hyp[k], v[0], v[1])
-												updates

											
										
										
											2019-04-17 19:04:01 +02:00
-												updates

											
										
										
											2019-07-01 17:14:42 +02:00
+								            # Train mutation
-												updates

											
										
										
											2019-08-23 13:25:27 +02:00
+								            results = train()
-												updates

											
										
										
											2019-04-17 17:27:51 +02:00
 								            # Write mutation results
-												updates

											
										
										
											2019-07-25 17:49:54 +02:00
+								            print_mutation(hyp, results, opt.bucket)
 								            # Plot results
-												updates

											
										
										
											2019-07-26 12:00:43 +02:00
+								            # plot_evolution_results(hyp)