Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets.
482 lines
18 KiB
Python
Executable File
482 lines
18 KiB
Python
Executable File
import glob
|
|
import random
|
|
from collections import defaultdict
|
|
|
|
import cv2
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
|
|
from utils import torch_utils
|
|
|
|
# Set printoptions
|
|
torch.set_printoptions(linewidth=1320, precision=5, profile='long')
|
|
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
|
|
|
|
# Prevent OpenCV from multithreading (to use PyTorch DataLoader)
|
|
cv2.setNumThreads(0)
|
|
|
|
|
|
def float3(x): # format floats to 3 decimals
|
|
return float(format(x, '.3f'))
|
|
|
|
|
|
def init_seeds(seed=0):
|
|
random.seed(seed)
|
|
np.random.seed(seed)
|
|
torch_utils.init_seeds(seed=seed)
|
|
|
|
|
|
def load_classes(path):
|
|
# Loads class labels at 'path'
|
|
fp = open(path, 'r')
|
|
names = fp.read().split('\n')
|
|
return list(filter(None, names)) # filter removes empty strings (such as last line)
|
|
|
|
|
|
def model_info(model):
|
|
# Plots a line-by-line description of a PyTorch model
|
|
n_p = sum(x.numel() for x in model.parameters()) # number parameters
|
|
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
|
|
print('\n%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
|
|
for i, (name, p) in enumerate(model.named_parameters()):
|
|
name = name.replace('module_list.', '')
|
|
print('%5g %40s %9s %12g %20s %10.3g %10.3g' % (
|
|
i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
|
|
print('Model Summary: %g layers, %g parameters, %g gradients' % (i + 1, n_p, n_g))
|
|
|
|
|
|
def coco_class_weights(): # frequency of each class in coco train2014
|
|
weights = 1 / torch.FloatTensor(
|
|
[187437, 4955, 30920, 6033, 3838, 4332, 3160, 7051, 7677, 9167, 1316, 1372, 833, 6757, 7355, 3302, 3776, 4671,
|
|
6769, 5706, 3908, 903, 3686, 3596, 6200, 7920, 8779, 4505, 4272, 1862, 4698, 1962, 4403, 6659, 2402, 2689,
|
|
4012, 4175, 3411, 17048, 5637, 14553, 3923, 5539, 4289, 10084, 7018, 4314, 3099, 4638, 4939, 5543, 2038, 4004,
|
|
5053, 4578, 27292, 4113, 5931, 2905, 11174, 2873, 4036, 3415, 1517, 4122, 1980, 4464, 1190, 2302, 156, 3933,
|
|
1877, 17630, 4337, 4624, 1075, 3468, 135, 1380])
|
|
weights /= weights.sum()
|
|
return weights
|
|
|
|
|
|
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
|
|
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
|
|
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
|
|
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
|
|
# x = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
|
|
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
|
|
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
|
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
|
|
return x
|
|
|
|
|
|
def plot_one_box(x, img, color=None, label=None, line_thickness=None):
|
|
# Plots one bounding box on image img
|
|
tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 # line thickness
|
|
color = color or [random.randint(0, 255) for _ in range(3)]
|
|
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
|
|
cv2.rectangle(img, c1, c2, color, thickness=tl)
|
|
if label:
|
|
tf = max(tl - 1, 1) # font thickness
|
|
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
|
|
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
|
|
cv2.rectangle(img, c1, c2, color, -1) # filled
|
|
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
|
|
|
|
|
|
def weights_init_normal(m):
|
|
classname = m.__class__.__name__
|
|
if classname.find('Conv') != -1:
|
|
torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
|
|
elif classname.find('BatchNorm2d') != -1:
|
|
torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
|
|
torch.nn.init.constant_(m.bias.data, 0.0)
|
|
|
|
|
|
def xyxy2xywh(x):
|
|
# Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
|
|
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
|
|
y[:, 0] = (x[:, 0] + x[:, 2]) / 2
|
|
y[:, 1] = (x[:, 1] + x[:, 3]) / 2
|
|
y[:, 2] = x[:, 2] - x[:, 0]
|
|
y[:, 3] = x[:, 3] - x[:, 1]
|
|
return y
|
|
|
|
|
|
def xywh2xyxy(x):
|
|
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
|
|
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
|
|
y[:, 0] = (x[:, 0] - x[:, 2] / 2)
|
|
y[:, 1] = (x[:, 1] - x[:, 3] / 2)
|
|
y[:, 2] = (x[:, 0] + x[:, 2] / 2)
|
|
y[:, 3] = (x[:, 1] + x[:, 3] / 2)
|
|
return y
|
|
|
|
|
|
def scale_coords(img_size, coords, img0_shape):
|
|
# Rescale x1, y1, x2, y2 from 416 to image size
|
|
gain = float(img_size) / max(img0_shape) # gain = old / new
|
|
pad_x = (img_size - img0_shape[1] * gain) / 2 # width padding
|
|
pad_y = (img_size - img0_shape[0] * gain) / 2 # height padding
|
|
coords[:, [0, 2]] -= pad_x
|
|
coords[:, [1, 3]] -= pad_y
|
|
coords[:, :4] /= gain
|
|
coords[:, :4] = torch.clamp(coords[:, :4], min=0)
|
|
return coords
|
|
|
|
|
|
def ap_per_class(tp, conf, pred_cls, target_cls):
|
|
""" Compute the average precision, given the recall and precision curves.
|
|
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
|
|
# Arguments
|
|
tp: True positives (list).
|
|
conf: Objectness value from 0-1 (list).
|
|
pred_cls: Predicted object classes (list).
|
|
target_cls: True object classes (list).
|
|
# Returns
|
|
The average precision as computed in py-faster-rcnn.
|
|
"""
|
|
|
|
# Sort by objectness
|
|
i = np.argsort(-conf)
|
|
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
|
|
|
|
# Find unique classes
|
|
unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0))
|
|
|
|
# Create Precision-Recall curve and compute AP for each class
|
|
ap, p, r = [], [], []
|
|
for c in unique_classes:
|
|
i = pred_cls == c
|
|
n_gt = sum(target_cls == c) # Number of ground truth objects
|
|
n_p = sum(i) # Number of predicted objects
|
|
|
|
if (n_p == 0) and (n_gt == 0):
|
|
continue
|
|
elif (n_p == 0) or (n_gt == 0):
|
|
ap.append(0)
|
|
r.append(0)
|
|
p.append(0)
|
|
else:
|
|
# Accumulate FPs and TPs
|
|
fpc = np.cumsum(1 - tp[i])
|
|
tpc = np.cumsum(tp[i])
|
|
|
|
# Recall
|
|
recall_curve = tpc / (n_gt + 1e-16)
|
|
r.append(tpc[-1] / (n_gt + 1e-16))
|
|
|
|
# Precision
|
|
precision_curve = tpc / (tpc + fpc)
|
|
p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
|
|
|
|
# AP from recall-precision curve
|
|
ap.append(compute_ap(recall_curve, precision_curve))
|
|
|
|
return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p)
|
|
|
|
|
|
def compute_ap(recall, precision):
|
|
""" Compute the average precision, given the recall and precision curves.
|
|
Source: https://github.com/rbgirshick/py-faster-rcnn.
|
|
# Arguments
|
|
recall: The recall curve (list).
|
|
precision: The precision curve (list).
|
|
# Returns
|
|
The average precision as computed in py-faster-rcnn.
|
|
"""
|
|
# correct AP calculation
|
|
# first append sentinel values at the end
|
|
|
|
mrec = np.concatenate(([0.], recall, [1.]))
|
|
mpre = np.concatenate(([0.], precision, [0.]))
|
|
|
|
# compute the precision envelope
|
|
for i in range(mpre.size - 1, 0, -1):
|
|
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
|
|
|
|
# to calculate area under PR curve, look for points
|
|
# where X axis (recall) changes value
|
|
i = np.where(mrec[1:] != mrec[:-1])[0]
|
|
|
|
# and sum (\Delta recall) * prec
|
|
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
|
|
return ap
|
|
|
|
|
|
def bbox_iou(box1, box2, x1y1x2y2=True):
|
|
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
|
|
box2 = box2.t()
|
|
|
|
# Get the coordinates of bounding boxes
|
|
if x1y1x2y2:
|
|
# x1, y1, x2, y2 = box1
|
|
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
|
|
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
|
|
else:
|
|
# x, y, w, h = box1
|
|
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
|
|
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
|
|
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
|
|
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
|
|
|
|
# Intersection area
|
|
inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
|
|
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
|
|
|
|
# Union Area
|
|
union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
|
|
(b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
|
|
|
|
return inter_area / union_area # iou
|
|
|
|
|
|
def wh_iou(box1, box2):
|
|
# Returns the IoU of wh1 to wh2. wh1 is 2, wh2 is nx2
|
|
box2 = box2.t()
|
|
|
|
# w, h = box1
|
|
w1, h1 = box1[0], box1[1]
|
|
w2, h2 = box2[0], box2[1]
|
|
|
|
# Intersection area
|
|
inter_area = torch.min(w1, w2) * torch.min(h1, h2)
|
|
|
|
# Union Area
|
|
union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
|
|
|
|
return inter_area / union_area # iou
|
|
|
|
|
|
def compute_loss(p, targets): # predictions, targets
|
|
FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor
|
|
loss, lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]), FT([0])
|
|
txy, twh, tcls, indices = targets
|
|
MSE = nn.MSELoss()
|
|
CE = nn.CrossEntropyLoss()
|
|
BCE = nn.BCEWithLogitsLoss()
|
|
|
|
# Compute losses
|
|
# gp = [x.numel() for x in tconf] # grid points
|
|
for i, pi0 in enumerate(p): # layer i predictions, i
|
|
b, a, gj, gi = indices[i] # image, anchor, gridx, gridy
|
|
tconf = torch.zeros_like(pi0[..., 0]) # conf
|
|
|
|
# Compute losses
|
|
k = 1 # nT / bs
|
|
if len(b) > 0:
|
|
pi = pi0[b, a, gj, gi] # predictions closest to anchors
|
|
tconf[b, a, gj, gi] = 1 # conf
|
|
|
|
lxy += k * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy
|
|
lwh += k * MSE(pi[..., 2:4], twh[i]) # wh
|
|
lcls += (k / 4) * CE(pi[..., 5:], tcls[i])
|
|
|
|
# pos_weight = FT([gp[i] / min(gp) * 4.])
|
|
# BCE = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
|
|
lconf += (k * 64) * BCE(pi0[..., 4], tconf)
|
|
loss = lxy + lwh + lconf + lcls
|
|
|
|
# Add to dictionary
|
|
d = defaultdict(float)
|
|
losses = [loss.item(), lxy.item(), lwh.item(), lconf.item(), lcls.item()]
|
|
for name, x in zip(['total', 'xy', 'wh', 'conf', 'cls'], losses):
|
|
d[name] = x
|
|
|
|
return loss, d
|
|
|
|
|
|
def build_targets(model, targets):
|
|
# targets = [image, class, x, y, w, h]
|
|
if isinstance(model, nn.parallel.DistributedDataParallel):
|
|
model = model.module
|
|
|
|
txy, twh, tcls, indices = [], [], [], []
|
|
for i, layer in enumerate(get_yolo_layers(model)):
|
|
nG = model.module_list[layer][0].nG # grid size
|
|
anchor_vec = model.module_list[layer][0].anchor_vec
|
|
|
|
# iou of targets-anchors
|
|
gwh = targets[:, 4:6] * nG
|
|
iou = [wh_iou(x, gwh) for x in anchor_vec]
|
|
iou, a = torch.stack(iou, 0).max(0) # best iou and anchor
|
|
|
|
# reject below threshold ious (OPTIONAL)
|
|
reject = True
|
|
if reject:
|
|
j = iou > 0.01
|
|
t, a, gwh = targets[j], a[j], gwh[j]
|
|
else:
|
|
t = targets
|
|
|
|
# Indices
|
|
b, c = t[:, 0:2].long().t() # target image, class
|
|
gxy = t[:, 2:4] * nG
|
|
gi, gj = gxy.long().t() # grid_i, grid_j
|
|
indices.append((b, a, gj, gi))
|
|
|
|
# XY coordinates
|
|
txy.append(gxy - gxy.floor())
|
|
|
|
# Width and height
|
|
twh.append(torch.log(gwh / anchor_vec[a])) # yolo method
|
|
# twh.append(torch.sqrt(gwh / anchor_vec[a]) / 2) # power method
|
|
|
|
# Class
|
|
tcls.append(c)
|
|
|
|
return txy, twh, tcls, indices
|
|
|
|
|
|
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
|
|
"""
|
|
Removes detections with lower object confidence score than 'conf_thres'
|
|
Non-Maximum Suppression to further filter detections.
|
|
Returns detections with shape:
|
|
(x1, y1, x2, y2, object_conf, class_score, class_pred)
|
|
"""
|
|
|
|
output = [None for _ in range(len(prediction))]
|
|
for image_i, pred in enumerate(prediction):
|
|
# Experiment: Prior class size rejection
|
|
# x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
|
|
# a = w * h # area
|
|
# ar = w / (h + 1e-16) # aspect ratio
|
|
# n = len(w)
|
|
# log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
|
|
# shape_likelihood = np.zeros((n, 60), dtype=np.float32)
|
|
# x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
|
|
# from scipy.stats import multivariate_normal
|
|
# for c in range(60):
|
|
# shape_likelihood[:, c] =
|
|
# multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
|
|
|
|
# Filter out confidence scores below threshold
|
|
class_prob, class_pred = torch.max(F.softmax(pred[:, 5:], 1), 1)
|
|
v = pred[:, 4] > conf_thres
|
|
v = v.nonzero().squeeze()
|
|
if len(v.shape) == 0:
|
|
v = v.unsqueeze(0)
|
|
|
|
pred = pred[v]
|
|
class_prob = class_prob[v]
|
|
class_pred = class_pred[v]
|
|
|
|
# If none are remaining => process next image
|
|
nP = pred.shape[0]
|
|
if not nP:
|
|
continue
|
|
|
|
# From (center x, center y, width, height) to (x1, y1, x2, y2)
|
|
pred[:, :4] = xywh2xyxy(pred[:, :4])
|
|
|
|
# Detections ordered as (x1, y1, x2, y2, obj_conf, class_prob, class_pred)
|
|
detections = torch.cat((pred[:, :5], class_prob.float().unsqueeze(1), class_pred.float().unsqueeze(1)), 1)
|
|
# Iterate through all predicted classes
|
|
unique_labels = detections[:, -1].cpu().unique().to(prediction.device)
|
|
|
|
nms_style = 'OR' # 'OR' (default), 'AND', 'MERGE' (experimental)
|
|
for c in unique_labels:
|
|
# Get the detections with class c
|
|
dc = detections[detections[:, -1] == c]
|
|
# Sort the detections by maximum object confidence
|
|
_, conf_sort_index = torch.sort(dc[:, 4] * dc[:, 5], descending=True)
|
|
dc = dc[conf_sort_index]
|
|
|
|
# Non-maximum suppression
|
|
det_max = []
|
|
ind = list(range(len(dc)))
|
|
if nms_style == 'OR': # default
|
|
while len(ind):
|
|
j = ind[0]
|
|
det_max.append(dc[j:j + 1]) # save highest conf detection
|
|
reject = bbox_iou(dc[j], dc[ind]) > nms_thres
|
|
[ind.pop(i) for i in reversed(reject.nonzero())]
|
|
# while dc.shape[0]: # SLOWER METHOD
|
|
# det_max.append(dc[:1]) # save highest conf detection
|
|
# if len(dc) == 1: # Stop if we're at the last detection
|
|
# break
|
|
# iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
|
|
# dc = dc[1:][iou < nms_thres] # remove ious > threshold
|
|
|
|
# Image Total P R mAP
|
|
# 4964 5000 0.629 0.594 0.586
|
|
|
|
elif nms_style == 'AND': # requires overlap, single boxes erased
|
|
while len(dc) > 1:
|
|
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
|
|
if iou.max() > 0.5:
|
|
det_max.append(dc[:1])
|
|
dc = dc[1:][iou < nms_thres] # remove ious > threshold
|
|
|
|
elif nms_style == 'MERGE': # weighted mixture box
|
|
while len(dc) > 0:
|
|
iou = bbox_iou(dc[0], dc[0:]) # iou with other boxes
|
|
i = iou > nms_thres
|
|
|
|
weights = dc[i, 4:5] * dc[i, 5:6]
|
|
dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
|
|
det_max.append(dc[:1])
|
|
dc = dc[iou < nms_thres]
|
|
|
|
# Image Total P R mAP
|
|
# 4964 5000 0.633 0.598 0.589 # normal
|
|
|
|
if len(det_max) > 0:
|
|
det_max = torch.cat(det_max)
|
|
# Add max detections to outputs
|
|
output[image_i] = det_max if output[image_i] is None else torch.cat((output[image_i], det_max))
|
|
|
|
return output
|
|
|
|
|
|
def get_yolo_layers(model):
|
|
bool_vec = [x['type'] == 'yolo' for x in model.module_defs]
|
|
return [i for i, x in enumerate(bool_vec) if x] # [82, 94, 106] for yolov3
|
|
|
|
|
|
def strip_optimizer_from_checkpoint(filename='weights/best.pt'):
|
|
# Strip optimizer from *.pt files for lighter files (reduced by 2/3 size)
|
|
a = torch.load(filename, map_location='cpu')
|
|
a['optimizer'] = []
|
|
torch.save(a, filename.replace('.pt', '_lite.pt'))
|
|
|
|
|
|
def coco_class_count(path='../coco/labels/train2014/'):
|
|
# Histogram of occurrences per class
|
|
nC = 80 # number classes
|
|
x = np.zeros(nC, dtype='int32')
|
|
files = sorted(glob.glob('%s/*.*' % path))
|
|
for i, file in enumerate(files):
|
|
labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
|
|
x += np.bincount(labels[:, 0].astype('int32'), minlength=nC)
|
|
print(i, len(files))
|
|
|
|
|
|
def coco_only_people(path='../coco/labels/val2014/'):
|
|
# Find images with only people
|
|
files = sorted(glob.glob('%s/*.*' % path))
|
|
for i, file in enumerate(files):
|
|
labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
|
|
if all(labels[:, 0] == 0):
|
|
print(labels.shape[0], file)
|
|
|
|
|
|
def plot_results(start=0):
|
|
# Plot YOLO training results file 'results.txt'
|
|
# import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v3.txt')
|
|
# from utils.utils import *; plot_results()
|
|
|
|
fig = plt.figure(figsize=(14, 7))
|
|
s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Total Loss', 'Precision', 'Recall', 'mAP']
|
|
for f in sorted(glob.glob('results*.txt')):
|
|
results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11]).T # column 11 is mAP
|
|
x = range(1, results.shape[1])
|
|
for i in range(8):
|
|
plt.subplot(2, 4, i + 1)
|
|
plt.plot(results[i, x[start:]], marker='.', label=f)
|
|
plt.title(s[i])
|
|
if i == 0:
|
|
plt.legend()
|
|
fig.tight_layout()
|