From dec2c7d9a6bc2c856f93307404be6f01e6c8cf9a Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 17 Jan 2020 17:52:28 -0800 Subject: [PATCH] updates --- test.py | 9 ++++++--- train.py | 44 ++++++++++++++++++++++---------------------- utils/datasets.py | 5 +++-- utils/evolve.sh | 4 ++-- utils/gcp.sh | 34 +++++++++++++++++++++++++++++++--- 5 files changed, 64 insertions(+), 32 deletions(-) diff --git a/test.py b/test.py index bd859580..68b636b1 100644 --- a/test.py +++ b/test.py @@ -17,7 +17,8 @@ def test(cfg, iou_thres=0.5, # for nms save_json=False, model=None, - dataloader=None): + dataloader=None, + single_cls=False): # Initialize/load model and set device if model is None: device = torch_utils.select_device(opt.device, batch_size=batch_size) @@ -45,7 +46,7 @@ def test(cfg, # Configure run data = parse_data_cfg(data) - nc = int(data['classes']) # number of classes + nc = 1 if single_cls else int(data['classes']) # number of classes path = data['valid'] # path to test images names = load_classes(data['names']) # class names iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95 @@ -216,6 +217,7 @@ if __name__ == '__main__': parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') parser.add_argument('--task', default='test', help="'test', 'study', 'benchmark'") parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu') + parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') opt = parser.parse_args() opt.save_json = opt.save_json or any([x in opt.data for x in ['coco.data', 'coco2014.data', 'coco2017.data']]) print(opt) @@ -229,7 +231,8 @@ if __name__ == '__main__': opt.img_size, opt.conf_thres, opt.iou_thres, - opt.save_json) + opt.save_json, + opt.single_cls) elif opt.task == 'benchmark': # mAPs at 320-608 at conf 0.5 and 0.7 diff --git a/train.py b/train.py index 58006c45..158cbbef 100644 --- a/train.py +++ b/train.py @@ -74,7 +74,7 @@ def train(): data_dict = parse_data_cfg(data) train_path = data_dict['train'] test_path = data_dict['valid'] - nc = int(data_dict['classes']) # number of classes + nc = 1 if opt.single_cls else int(data_dict['classes']) # number of classes # Remove previous results for f in glob.glob('*_batch*.jpg') + glob.glob(results_file): @@ -177,7 +177,8 @@ def train(): hyp=hyp, # augmentation hyperparameters rect=opt.rect, # rectangular training cache_labels=True, - cache_images=opt.cache_images) + cache_images=opt.cache_images, + single_cls=opt.single_cls) # Dataloader batch_size = min(batch_size, len(dataset)) @@ -194,7 +195,8 @@ def train(): hyp=hyp, rect=True, cache_labels=True, - cache_images=opt.cache_images), + cache_images=opt.cache_images, + single_cls=opt.single_cls), batch_size=batch_size * 2, num_workers=nw, pin_memory=True, @@ -202,6 +204,7 @@ def train(): # Start training nb = len(dataloader) + prebias = start_epoch == 0 model.nc = nc # attach number of classes to model model.arc = opt.arc # attach yolo architecture model.hyp = hyp # attach hyperparameters to model @@ -213,24 +216,22 @@ def train(): torch_utils.model_info(model, report='summary') # 'full' or 'summary' print('Using %g dataloader workers' % nw) print('Starting training for %g epochs...' % epochs) - for epoch in range(start_epoch - 1 if opt.prebias else start_epoch, epochs): # epoch ------------------------------ + for epoch in range(start_epoch, epochs): # epoch ------------------------------ model.train() - print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) # Prebias - if opt.prebias: - if epoch < 0: # prebias - ps = 0.1, 0.9, False # prebias settings (lr=0.1, momentum=0.9, requires_grad=False) + if prebias: + if epoch < 20: # prebias + ps = 0.1, 0.9 # prebias settings (lr=0.1, momentum=0.9) else: # normal training - ps = hyp['lr0'], hyp['momentum'], True # normal training settings - opt.prebias = False + ps = hyp['lr0'], hyp['momentum'] # normal training settings + print_model_biases(model) + prebias = False - for p in optimizer.param_groups: - p['lr'] = ps[0] # learning rate - if p.get('momentum') is not None: # for SGD but not Adam - p['momentum'] = ps[1] - for name, p in model.named_parameters(): - p.requires_grad = True if name.endswith('.bias') else ps[2] + # Bias optimizer settings + optimizer.param_groups[2]['lr'] = ps[0] + if optimizer.param_groups[2].get('momentum') is not None: # for SGD but not Adam + optimizer.param_groups[2]['momentum'] = ps[1] # Update image weights (optional) if dataset.image_weights: @@ -239,6 +240,7 @@ def train(): dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx mloss = torch.zeros(4).to(device) # mean losses + print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) pbar = tqdm(enumerate(dataloader), total=nb) # progress bar for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) @@ -307,10 +309,7 @@ def train(): # Process epoch results final_epoch = epoch + 1 == epochs - if opt.prebias: - print_model_biases(model) - continue - elif not opt.notest or final_epoch: # Calculate mAP + if not opt.notest or final_epoch: # Calculate mAP is_coco = any([x in data for x in ['coco.data', 'coco2014.data', 'coco2017.data']]) and model.nc == 80 results, maps = test.test(cfg, data, @@ -320,7 +319,8 @@ def train(): conf_thres=0.001 if final_epoch else 0.1, # 0.1 for speed iou_thres=0.6 if final_epoch and is_coco else 0.5, save_json=final_epoch and is_coco, - dataloader=testloader) + dataloader=testloader, + single_cls=opt.single_cls) # Update scheduler scheduler.step() @@ -412,10 +412,10 @@ if __name__ == '__main__': parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') parser.add_argument('--weights', type=str, default='weights/ultralytics68.pt', help='initial weights') parser.add_argument('--arc', type=str, default='default', help='yolo architecture') # defaultpw, uCE, uBCE - parser.add_argument('--prebias', action='store_true', help='pretrain model biases') parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied') parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1 or cpu)') parser.add_argument('--adam', action='store_true', help='use adam optimizer') + parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') parser.add_argument('--var', type=float, help='debug variable') opt = parser.parse_args() opt.weights = last if opt.resume else opt.weights diff --git a/utils/datasets.py b/utils/datasets.py index f9f5459c..3f8a8304 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -263,7 +263,7 @@ class LoadStreams: # multiple IP or RTSP cameras class LoadImagesAndLabels(Dataset): # for training/testing def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_labels=False, cache_images=False): + cache_labels=False, cache_images=False, single_cls=False): path = str(Path(path)) # os-agnostic assert os.path.isfile(path), 'File not found %s. See %s' % (path, help_url) with open(path, 'r') as f: @@ -343,7 +343,8 @@ class LoadImagesAndLabels(Dataset): # for training/testing assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows - + if single_cls: + l[:, 0] = 0 # force dataset into single-class mode self.labels[i] = l nf += 1 # file found diff --git a/utils/evolve.sh b/utils/evolve.sh index bdc0a2c0..f1a96a5e 100644 --- a/utils/evolve.sh +++ b/utils/evolve.sh @@ -8,8 +8,8 @@ #t=ultralytics/yolov3:v199 && sudo docker pull $t && sudo nvidia-docker run -it --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t python3 train.py --data coco2014.data --img-size 672 --epochs 10 --batch 16 --accum 4 --weights '' --arc defaultpw --device 0 --multi while true; do - python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 100 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --bucket ult/wer --evolve --device $1 --cfg yolov3-tiny-3cls.cfg --cache - # python3 train.py --data ../out/data.data --img-size 608 --epochs 10 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --bucket ult/athena --evolve --device $1 --cfg yolov3-spp-1cls.cfg + # python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 100 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc default --pre --multi --bucket ult/wer --evolve --device $1 --cfg yolov3-tiny-3cls.cfg --cache + python3 train.py --data ../out/data.data --img-size 608 --epochs 10 --batch 8 --accum 8 --weights ultralytics68.pt --arc default --pre --multi --bucket ult/athena --evolve --device $1 --cfg yolov3-spp-1cls.cfg # python3 train.py --data coco2014.data --img-size 640 --epochs 10 --batch 22 --accum 3 --evolve --weights '' --arc defaultpw --pre --bucket yolov4/640ms_coco2014_10e --device $1 --multi # python3 train.py --data coco2014.data --img-size 320 --epochs 27 --batch 64 --accum 1 --evolve --weights '' --arc defaultpw --pre --bucket yolov4/320_coco2014_27e --device $1 diff --git a/utils/gcp.sh b/utils/gcp.sh index c1c731e0..407c2e7a 100755 --- a/utils/gcp.sh +++ b/utils/gcp.sh @@ -38,11 +38,17 @@ python3 detect.py # Test python3 test.py --save-json +# Kill All +t=ultralytics/yolov3:v206 +docker kill $(docker ps -a -q --filter ancestor=$t) +t=ultralytics/yolov3:v208 +docker kill $(docker ps -a -q --filter ancestor=$t) + # Evolve sudo -s t=ultralytics/yolov3:v206 docker kill $(docker ps -a -q --filter ancestor=$t) -for i in 6 7 +for i in 4 5 6 7 do docker pull $t && docker run --gpus all -d --ipc=host -v "$(pwd)"/data:/usr/src/data $t bash utils/evolve.sh $i # docker pull $t && docker run --gpus all -d --ipc=host -v "$(pwd)"/out:/usr/src/out $t bash utils/evolve.sh $i @@ -55,10 +61,10 @@ done sudo -s t=ultralytics/yolov3:v208 docker kill $(docker ps -a -q --filter ancestor=$t) -for i in 0 +for i in 0 1 do # docker pull $t && docker run --gpus all -d --ipc=host -v "$(pwd)"/data:/usr/src/data $t bash utils/evolve.sh $i - docker pull $t && docker run --gpus all -it --ipc=host -v "$(pwd)"/out:/usr/src/out $t bash utils/evolve.sh $i + docker pull $t && docker run --gpus all -d --ipc=host -v "$(pwd)"/out:/usr/src/out $t bash utils/evolve.sh $i # docker pull $t && nvidia-docker run -d -v "$(pwd)"/coco:/usr/src/coco $t bash utils/evolve.sh $i # docker pull $t && nvidia-docker run -d -v /mnt/disks/nvme0n1/coco:/usr/src/coco $t bash utils/evolve.sh $i sleep 180 @@ -277,6 +283,19 @@ n=211 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gp n=212 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 100 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --device 0 --bucket ult/athena --name $n --cfg yolov3-spp-1cls.cfg n=213 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 100 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --device 0 --bucket ult/athena --name $n --cfg yolov3-spp-1cls.cfg n=214 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 100 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --device 0 --bucket ult/athena --name $n --cfg yolov3-spp-1cls.cfg +n=215 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 100 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --device 0 --bucket ult/athena --name $n --cfg yolov3-spp-1cls.cfg +n=217 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all --ipc=host -it -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 100 --batch 8 --accum 8 --weights ultralytics68.pt --arc default --pre --multi --device 6 --bucket ult/athena --name $n --nosave --cfg yolov3-spp-1cls.cfg +n=219 && t=ultralytics/yolov3:v215 && sudo docker pull $t && sudo docker run -d --gpus all --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 10 --batch 8 --accum 8 --weights ultralytics68.pt --arc default --pre --multi --device 0 --bucket ult/athena --name $n --nosave --cfg yolov3-spp-1cls.cfg +n=220 && t=ultralytics/yolov3:v215 && sudo docker pull $t && sudo docker run -d --gpus all --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 20 --batch 8 --accum 8 --weights ultralytics68.pt --arc default --pre --multi --device 1 --bucket ult/athena --name $n --nosave --cfg yolov3-spp-1cls.cfg +n=221 && t=ultralytics/yolov3:v215 && sudo docker pull $t && sudo docker run -d --gpus all --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 30 --batch 8 --accum 8 --weights ultralytics68.pt --arc default --pre --multi --device 2 --bucket ult/athena --name $n --nosave --cfg yolov3-spp-1cls.cfg +n=222 && t=ultralytics/yolov3:v215 && sudo docker pull $t && sudo docker run -d --gpus all --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 40 --batch 8 --accum 8 --weights ultralytics68.pt --arc default --pre --multi --device 3 --bucket ult/athena --name $n --nosave --cfg yolov3-spp-1cls.cfg +n=223 && t=ultralytics/yolov3:v215 && sudo docker pull $t && sudo docker run -d --gpus all --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 10 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --device 0 --bucket ult/athena --name $n --nosave --cfg yolov3-spp-1cls.cfg +n=224 && t=ultralytics/yolov3:v215 && sudo docker pull $t && sudo docker run -d --gpus all --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 20 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --device 1 --bucket ult/athena --name $n --nosave --cfg yolov3-spp-1cls.cfg +n=225 && t=ultralytics/yolov3:v215 && sudo docker pull $t && sudo docker run -d --gpus all --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 30 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --device 0 --bucket ult/athena --name $n --nosave --cfg yolov3-spp-1cls.cfg +n=226 && t=ultralytics/yolov3:v215 && sudo docker pull $t && sudo docker run -d --gpus all --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 40 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --device 0 --bucket ult/athena --name $n --nosave --cfg yolov3-spp-1cls.cfg +n=227 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 10 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --multi --device 0 --bucket ult/athena --name $n --nosave --cfg yolov3-spp-1cls.cfg +n=228 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 20 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --multi --device 0 --bucket ult/athena --name $n --nosave --cfg yolov3-spp-1cls.cfg +n=229 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 20 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --multi --device 0 --bucket ult/athena --name $n --nosave --cfg yolov3-spp-1cls.cfg # sm4 n=201 && t=ultralytics/yolov3:v201 && sudo docker pull $t && sudo nvidia-docker run -d -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 1000 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --bucket ult/wer --name $n --device 0 --cfg yolov3-tiny-3cls.cfg @@ -287,3 +306,12 @@ n=205 && t=ultralytics/yolov3:v202 && sudo docker pull $t && sudo nvidia-docker n=206 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all -it -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 100 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --notest --nosave --cache --device 0 --cfg yolov3-tiny-3cls.cfg n=209 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all -it -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 1000 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --bucket ult/wer --name $n --nosave --cache --device 3 --cfg yolov3-tiny-3cls.cfg n=210 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all -it -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 1000 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --bucket ult/wer --name $n --nosave --cache --device 1 --cfg yolov3-tiny-3cls.cfg +n=216 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all -it -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 1000 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --bucket ult/wer --name $n --nosave --cache --device 0 --cfg yolov3-tiny-3cls.cfg +n=218 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all --ipc=host -it -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 1000 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc default --pre --multi --bucket ult/wer --name $n --nosave --cache --device 7 --cfg yolov3-tiny-3cls.cfg +n=230 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all --ipc=host -it -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 100 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc default --multi --bucket ult/wer --name $n --nosave --cache --device 0 --cfg yolov3-tiny-1cls.cfg --single +n=231 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all --ipc=host -it -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 100 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc default --multi --bucket ult/wer --name $n --nosave --cache --device 1 --cfg yolov3-tiny-1cls.cfg --single + + +n=206 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all -it --ipc=host -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 10 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --nosave --cache --device 0 --cfg yolov3-tiny-3cls.cfg +n=206 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all -it --ipc=host -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 10 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --nosave --cache --device 1 --cfg yolov3-tiny-3cls.cfg +