diff --git a/detect.py b/detect.py index 39ce30f4..c33a5633 100755 --- a/detect.py +++ b/detect.py @@ -9,53 +9,48 @@ from utils import torch_utils def detect( - net_config_path, - data_config_path, - weights_path, + cfg, + weights, images_path, output='output', - batch_size=16, img_size=416, conf_thres=0.3, nms_thres=0.45, save_txt=False, - save_images=False, + save_images=True, ): device = torch_utils.select_device() - print("Using device: \"{}\"".format(device)) os.system('rm -rf ' + output) os.makedirs(output, exist_ok=True) - data_config = parse_data_config(data_config_path) - # Load model - model = Darknet(net_config_path, img_size) + model = Darknet(cfg, img_size) - if weights_path.endswith('.pt'): # pytorch format - if weights_path.endswith('weights/yolov3.pt') and not os.path.isfile(weights_path): - os.system('wget https://storage.googleapis.com/ultralytics/yolov3.pt -O ' + weights_path) - checkpoint = torch.load(weights_path, map_location='cpu') + if weights.endswith('.pt'): # pytorch format + if weights.endswith('weights/yolov3.pt') and not os.path.isfile(weights): + os.system('wget https://storage.googleapis.com/ultralytics/yolov3.pt -O ' + weights) + checkpoint = torch.load(weights, map_location='cpu') model.load_state_dict(checkpoint['model']) del checkpoint else: # darknet format - load_darknet_weights(model, weights_path) + load_darknet_weights(model, weights) model.to(device).eval() # Set Dataloader - classes = load_classes(data_config['names']) # Extracts class labels from file - dataloader = load_images(images_path, batch_size=batch_size, img_size=img_size) + dataloader = load_images(images_path, img_size=img_size) - imgs = [] # Stores image paths - img_detections = [] # Stores detections for each image index - prev_time = time.time() - for i, (img_paths, img) in enumerate(dataloader): - print('%g/%g' % (i + 1, len(dataloader)), end=' ') + # Classes and colors + classes = load_classes(parse_data_cfg('cfg/coco.data')['names']) # Extracts class labels from file + color_list = [[random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)] for _ in range(len(classes))] + + for i, (path, img, img0) in enumerate(dataloader): + print('image %g/%g: %s' % (i + 1, len(dataloader), path)) + t = time.time() # Get detections with torch.no_grad(): - # cv2.imwrite('zidane_416.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # letterboxed img = torch.from_numpy(img).unsqueeze(0).to(device) if ONNX_EXPORT: pred = torch.onnx._export(model, img, 'weights/model.onnx', verbose=True) @@ -64,71 +59,58 @@ def detect( pred = pred[pred[:, :, 4] > conf_thres] if len(pred) > 0: - detections = non_max_suppression(pred.unsqueeze(0), conf_thres, nms_thres) - img_detections.extend(detections) - imgs.extend(img_paths) + detections = non_max_suppression(pred.unsqueeze(0), conf_thres, nms_thres)[0] - print('Batch %d... Done. (%.3fs)' % (i, time.time() - prev_time)) - prev_time = time.time() + # Draw bounding boxes and labels of detections + if detections is not None: + img = img0 - # Bounding-box colors - color_list = [[random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)] for _ in range(len(classes))] + # The amount of padding that was added + pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape)) + pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape)) + # Image height and width after padding is removed + unpad_h = img_size - pad_y + unpad_w = img_size - pad_x - if len(img_detections) == 0: - return + unique_classes = detections[:, -1].cpu().unique() + bbox_colors = random.sample(color_list, len(unique_classes)) - # Iterate through images and save plot of detections - for img_i, (path, detections) in enumerate(zip(imgs, img_detections)): - print("image %g: '%s'" % (img_i, path)) + # write results to .txt file + results_img_path = os.path.join(output, path.split('/')[-1]) + results_txt_path = results_img_path + '.txt' + if os.path.isfile(results_txt_path): + os.remove(results_txt_path) - # Draw bounding boxes and labels of detections - if detections is not None: - img = cv2.imread(path) + for i in unique_classes: + n = (detections[:, -1].cpu() == i).sum() + print('%g %ss' % (n, classes[int(i)])) - # The amount of padding that was added - pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape)) - pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape)) - # Image height and width after padding is removed - unpad_h = img_size - pad_y - unpad_w = img_size - pad_x + for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: + # Rescale coordinates to original dimensions + box_h = ((y2 - y1) / unpad_h) * img.shape[0] + box_w = ((x2 - x1) / unpad_w) * img.shape[1] + y1 = (((y1 - pad_y // 2) / unpad_h) * img.shape[0]).round().item() + x1 = (((x1 - pad_x // 2) / unpad_w) * img.shape[1]).round().item() + x2 = (x1 + box_w).round().item() + y2 = (y1 + box_h).round().item() + x1, y1, x2, y2 = max(x1, 0), max(y1, 0), max(x2, 0), max(y2, 0) - unique_classes = detections[:, -1].cpu().unique() - bbox_colors = random.sample(color_list, len(unique_classes)) + # write to file + if save_txt: + with open(results_txt_path, 'a') as file: + file.write(('%g %g %g %g %g %g \n') % (x1, y1, x2, y2, cls_pred, cls_conf * conf)) - # write results to .txt file - results_img_path = os.path.join(output, path.split('/')[-1]) - results_txt_path = results_img_path + '.txt' - if os.path.isfile(results_txt_path): - os.remove(results_txt_path) - - for i in unique_classes: - n = (detections[:, -1].cpu() == i).sum() - print('%g %ss' % (n, classes[int(i)])) - - for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: - # Rescale coordinates to original dimensions - box_h = ((y2 - y1) / unpad_h) * img.shape[0] - box_w = ((x2 - x1) / unpad_w) * img.shape[1] - y1 = (((y1 - pad_y // 2) / unpad_h) * img.shape[0]).round().item() - x1 = (((x1 - pad_x // 2) / unpad_w) * img.shape[1]).round().item() - x2 = (x1 + box_w).round().item() - y2 = (y1 + box_h).round().item() - x1, y1, x2, y2 = max(x1, 0), max(y1, 0), max(x2, 0), max(y2, 0) - - # write to file - if save_txt: - with open(results_txt_path, 'a') as file: - file.write(('%g %g %g %g %g %g \n') % (x1, y1, x2, y2, cls_pred, cls_conf * conf)) + if save_images: + # Add the bbox to the plot + label = '%s %.2f' % (classes[int(cls_pred)], conf) + color = bbox_colors[int(np.where(unique_classes == int(cls_pred))[0])] + plot_one_box([x1, y1, x2, y2], img, label=label, color=color) if save_images: - # Add the bbox to the plot - label = '%s %.2f' % (classes[int(cls_pred)], conf) - color = bbox_colors[int(np.where(unique_classes == int(cls_pred))[0])] - plot_one_box([x1, y1, x2, y2], img, label=label, color=color) + # Save generated image with detections + cv2.imwrite(results_img_path.replace('.bmp', '.jpg').replace('.tif', '.jpg'), img) - if save_images: - # Save generated image with detections - cv2.imwrite(results_img_path.replace('.bmp', '.jpg').replace('.tif', '.jpg'), img) + print('Done. (%.3fs)\n' % (time.time() - t)) if platform == 'darwin': # MacOS (local) os.system('open ' + output) @@ -138,32 +120,20 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--image-folder', type=str, default='data/samples', help='path to images') parser.add_argument('--output-folder', type=str, default='output', help='path to outputs') - parser.add_argument('--plot-flag', type=bool, default=True) - parser.add_argument('--txt-out', type=bool, default=False) parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path') - parser.add_argument('--data-config', type=str, default='cfg/coco.data', help='path to data config file') parser.add_argument('--weights', type=str, default='weights/yolov3.pt', help='path to weights file') parser.add_argument('--conf-thres', type=float, default=0.50, help='object confidence threshold') parser.add_argument('--nms-thres', type=float, default=0.45, help='iou threshold for non-maximum suppression') - parser.add_argument('--batch-size', type=int, default=1, help='size of the batches') parser.add_argument('--img-size', type=int, default=32 * 13, help='size of each image dimension') opt = parser.parse_args() print(opt) - torch.cuda.empty_cache() - - init_seeds() - detect( opt.cfg, - opt.data_config, opt.weights, opt.image_folder, output=opt.output_folder, - batch_size=opt.batch_size, img_size=opt.img_size, conf_thres=opt.conf_thres, nms_thres=opt.nms_thres, - save_txt=opt.txt_out, - save_images=opt.plot_flag, ) diff --git a/models.py b/models.py index 34c573e3..29a01eaf 100755 --- a/models.py +++ b/models.py @@ -334,17 +334,17 @@ class Darknet(nn.Module): return sum(output) if is_training else torch.cat(output, 1) -def load_darknet_weights(self, weights_path, cutoff=-1): - # Parses and loads the weights stored in 'weights_path' +def load_darknet_weights(self, weights, cutoff=-1): + # Parses and loads the weights stored in 'weights' # cutoff: save layers between 0 and cutoff (if cutoff = -1 all are saved) - weights_file = weights_path.split(os.sep)[-1] + weights_file = weights.split(os.sep)[-1] # Try to download weights if not available locally - if not os.path.isfile(weights_path): + if not os.path.isfile(weights): try: - os.system('wget https://pjreddie.com/media/files/' + weights_file + ' -P ' + weights_path) + os.system('wget https://pjreddie.com/media/files/' + weights_file + ' -P ' + weights) except: - assert os.path.isfile(weights_path) + assert os.path.isfile(weights) # Establish cutoffs if weights_file == 'darknet53.conv.74': @@ -353,7 +353,7 @@ def load_darknet_weights(self, weights_path, cutoff=-1): cutoff = 16 # Open the weights file - fp = open(weights_path, 'rb') + fp = open(weights, 'rb') header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values # Needed to write header when saving weights diff --git a/test.py b/test.py index fafd816d..bcb54f99 100644 --- a/test.py +++ b/test.py @@ -8,34 +8,32 @@ from utils import torch_utils def test( - net_config_path, - data_config_path, - weights_path, + cfg, + data_cfg, + weights, batch_size=16, img_size=416, iou_thres=0.5, conf_thres=0.3, nms_thres=0.45, - n_cpus=0, ): device = torch_utils.select_device() - print("Using device: \"{}\"".format(device)) # Configure run - data_config = parse_data_config(data_config_path) - nC = int(data_config['classes']) # number of classes (80 for COCO) - test_path = data_config['valid'] + data_cfg = parse_data_cfg(data_cfg) + nC = int(data_cfg['classes']) # number of classes (80 for COCO) + test_path = data_cfg['valid'] # Initiate model - model = Darknet(net_config_path, img_size) + model = Darknet(cfg, img_size) # Load weights - if weights_path.endswith('.pt'): # pytorch format - checkpoint = torch.load(weights_path, map_location='cpu') + if weights.endswith('.pt'): # pytorch format + checkpoint = torch.load(weights, map_location='cpu') model.load_state_dict(checkpoint['model']) del checkpoint else: # darknet format - load_darknet_weights(model, weights_path) + load_darknet_weights(model, weights) model.to(device).eval() @@ -118,7 +116,7 @@ def test( # Print mAP per class print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP') + '\n\nmAP Per Class:') - classes = load_classes(data_config['names']) # Extracts class labels from file + classes = load_classes(data_cfg['names']) # Extracts class labels from file for i, c in enumerate(classes): print('%15s: %-.4f' % (c, AP_accum[i] / AP_accum_count[i])) @@ -130,12 +128,11 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(prog='test.py') parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch') parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='path to model config file') - parser.add_argument('--data-config', type=str, default='cfg/coco.data', help='path to data config file') + parser.add_argument('--data-cfg', type=str, default='cfg/coco.data', help='path to data config file') parser.add_argument('--weights', type=str, default='weights/yolov3.pt', help='path to weights file') parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected') parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold') parser.add_argument('--nms-thres', type=float, default=0.45, help='iou threshold for non-maximum suppression') - parser.add_argument('--n-cpus', type=int, default=0, help='number of cpu threads to use during batch generation') parser.add_argument('--img-size', type=int, default=416, help='size of each image dimension') opt = parser.parse_args() print(opt, end='\n\n') @@ -144,12 +141,11 @@ if __name__ == '__main__': mAP = test( opt.cfg, - opt.data_config, + opt.data_cfg, opt.weights, batch_size=opt.batch_size, img_size=opt.img_size, iou_thres=opt.iou_thres, conf_thres=opt.conf_thres, nms_thres=opt.nms_thres, - n_cpus=opt.n_cpus, ) diff --git a/train.py b/train.py index 6ab18406..f517f5b2 100644 --- a/train.py +++ b/train.py @@ -12,38 +12,37 @@ import test def train( - net_config_path, - data_config_path, + cfg, + data_cfg, img_size=416, resume=False, epochs=100, batch_size=16, accumulated_batches=1, - weights_path='weights', + weights='weights', report=False, multi_scale=False, freeze_backbone=True, var=0, ): device = torch_utils.select_device() - print("Using device: \"{}\"".format(device)) if multi_scale: # pass maximum multi_scale size img_size = 608 else: torch.backends.cudnn.benchmark = True - os.makedirs(weights_path, exist_ok=True) - latest_weights_file = os.path.join(weights_path, 'latest.pt') - best_weights_file = os.path.join(weights_path, 'best.pt') + os.makedirs(weights, exist_ok=True) + latest_weights_file = os.path.join(weights, 'latest.pt') + best_weights_file = os.path.join(weights, 'best.pt') # Configure run - data_config = parse_data_config(data_config_path) - num_classes = int(data_config['classes']) - train_path = data_config['train'] + data_cfg = parse_data_cfg(data_cfg) + num_classes = int(data_cfg['classes']) + train_path = data_cfg['train'] # Initialize model - model = Darknet(net_config_path, img_size) + model = Darknet(cfg, img_size) # Get dataloader dataloader = load_images_and_labels(train_path, batch_size=batch_size, img_size=img_size, @@ -80,7 +79,7 @@ def train( best_loss = float('inf') # Initialize model with darknet53 weights (optional) - load_darknet_weights(model, os.path.join(weights_path, 'darknet53.conv.74')) + load_darknet_weights(model, os.path.join(weights, 'darknet53.conv.74')) if torch.cuda.device_count() > 1: raise Exception('Multi-GPU not currently supported: https://github.com/ultralytics/yolov3/issues/21') @@ -191,24 +190,16 @@ def train( # Save best checkpoint if best_loss == loss_per_target: - os.system('cp {} {}'.format( - latest_weights_file, - best_weights_file, - )) + os.system('cp ' + latest_weights_file + ' ' + best_weights_file) # Save backup weights every 5 epochs if (epoch > 0) & (epoch % 5 == 0): - backup_file_name = 'backup{}.pt'.format(epoch) - backup_file_path = os.path.join(weights_path, backup_file_name) - os.system('cp {} {}'.format( - latest_weights_file, - backup_file_path, - )) + os.system('cp ' + latest_weights_file + ' ' + os.path.join(weights, 'backup{}.pt'.format(epoch))) # Calculate mAP mAP, R, P = test.test( - net_config_path, - data_config_path, + cfg, + data_cfg, latest_weights_file, batch_size=batch_size, img_size=img_size, @@ -224,11 +215,11 @@ if __name__ == '__main__': parser.add_argument('--epochs', type=int, default=100, help='number of epochs') parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch') parser.add_argument('--accumulated-batches', type=int, default=1, help='number of batches before optimizer step') - parser.add_argument('--data-config', type=str, default='cfg/coco.data', help='path to data config file') + parser.add_argument('--data-cfg', type=str, default='cfg/coco.data', help='path to data config file') parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path') parser.add_argument('--multi-scale', action='store_true', help='random image sizes per batch 320 - 608') parser.add_argument('--img-size', type=int, default=32 * 13, help='pixels') - parser.add_argument('--weights-path', type=str, default='weights', help='path to store weights') + parser.add_argument('--weights', type=str, default='weights', help='path to store weights') parser.add_argument('--resume', action='store_true', help='resume training flag') parser.add_argument('--report', action='store_true', help='report TP, FP, FN, P and R per batch (slower)') parser.add_argument('--freeze', action='store_true', help='freeze darknet53.conv.74 layers for first epoch') @@ -241,13 +232,13 @@ if __name__ == '__main__': torch.cuda.empty_cache() train( opt.cfg, - opt.data_config, + opt.data_cfg, img_size=opt.img_size, resume=opt.resume, epochs=opt.epochs, batch_size=opt.batch_size, accumulated_batches=opt.accumulated_batches, - weights_path=opt.weights_path, + weights=opt.weights, report=opt.report, multi_scale=opt.multi_scale, freeze_backbone=opt.freeze, diff --git a/utils/datasets.py b/utils/datasets.py index 5ec67a44..b8f52d99 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -13,7 +13,7 @@ from utils.utils import xyxy2xywh class load_images(): # for inference - def __init__(self, path, batch_size=1, img_size=416): + def __init__(self, path, img_size=416): if os.path.isdir(path): image_format = ['.jpg', '.jpeg', '.png', '.tif'] self.files = sorted(glob.glob('%s/*.*' % path)) @@ -22,43 +22,37 @@ class load_images(): # for inference self.files = [path] self.nF = len(self.files) # number of image files - self.nB = math.ceil(self.nF / batch_size) # number of batches - self.batch_size = batch_size self.height = img_size assert self.nF > 0, 'No images found in path %s' % path - # RGB normalization values - # self.rgb_mean = np.array([60.134, 49.697, 40.746], dtype=np.float32).reshape((3, 1, 1)) - # self.rgb_std = np.array([29.99, 24.498, 22.046], dtype=np.float32).reshape((3, 1, 1)) - def __iter__(self): self.count = -1 return self def __next__(self): self.count += 1 - if self.count == self.nB: + if self.count == self.nF: raise StopIteration img_path = self.files[self.count] # Read image - img = cv2.imread(img_path) # BGR + img0 = cv2.imread(img_path) # BGR + assert img0 is not None, 'Failed to load ' + img_path # Padded resize - img, _, _, _ = resize_square(img, height=self.height, color=(127.5, 127.5, 127.5)) + img, _, _, _ = resize_square(img0, height=self.height, color=(127.5, 127.5, 127.5)) # Normalize RGB img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img, dtype=np.float32) - # img -= self.rgb_mean - # img /= self.rgb_std img /= 255.0 - return [img_path], img + # cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image + return img_path, img, img0 def __len__(self): - return self.nB # number of batches + return self.nF # number of files class load_images_and_labels(): # for training @@ -81,10 +75,6 @@ class load_images_and_labels(): # for training assert self.nB > 0, 'No images found in path %s' % path - # RGB normalization values - # self.rgb_mean = np.array([60.134, 49.697, 40.746], dtype=np.float32).reshape((1, 3, 1, 1)) - # self.rgb_std = np.array([29.99, 24.498, 22.046], dtype=np.float32).reshape((1, 3, 1, 1)) - def __iter__(self): self.count = -1 self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) @@ -191,8 +181,6 @@ class load_images_and_labels(): # for training # Normalize img_all = np.stack(img_all)[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB and cv2 to pytorch img_all = np.ascontiguousarray(img_all, dtype=np.float32) - # img_all -= self.rgb_mean - # img_all /= self.rgb_std img_all /= 255.0 return torch.from_numpy(img_all), labels_all diff --git a/utils/parse_config.py b/utils/parse_config.py index 9dc03585..dae59196 100644 --- a/utils/parse_config.py +++ b/utils/parse_config.py @@ -20,7 +20,7 @@ def parse_model_config(path): return module_defs -def parse_data_config(path): +def parse_data_cfg(path): """Parses the data configuration file""" options = dict() options['gpus'] = '0,1,2,3' diff --git a/utils/torch_utils.py b/utils/torch_utils.py index 11a09627..19197eac 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -21,4 +21,5 @@ def select_device(force_cpu=False): device = torch.device('cpu') else: device = torch.device('cuda:0' if CUDA_AVAILABLE else 'cpu') + print('Using ' + str(device) + '\n') return device