Add collate_fn() to DataLoader (#163)
Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets.
This commit is contained in:
+78
-61
@@ -2,11 +2,14 @@ import glob
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch.utils.data import Dataset
|
||||
from tqdm import tqdm
|
||||
|
||||
from utils.utils import xyxy2xywh
|
||||
|
||||
@@ -97,7 +100,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
||||
assert len(self.img_files) > 0, 'No images found in %s' % path
|
||||
self.img_size = img_size
|
||||
self.augment = augment
|
||||
self.label_files = [x.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt')
|
||||
self.label_files = [x.replace('images', 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt')
|
||||
for x in self.img_files]
|
||||
|
||||
def __len__(self):
|
||||
@@ -136,58 +139,61 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
||||
img, ratio, padw, padh = letterbox(img, height=self.img_size)
|
||||
|
||||
# Load labels
|
||||
labels = []
|
||||
if os.path.isfile(label_path):
|
||||
with open(label_path, 'r') as file:
|
||||
lines = file.read().splitlines()
|
||||
|
||||
x = np.array([x.split() for x in lines], dtype=np.float32)
|
||||
if x.size is 0:
|
||||
# Empty labels file
|
||||
labels = np.array([])
|
||||
else:
|
||||
if x.size > 0:
|
||||
# Normalized xywh to pixel xyxy format
|
||||
labels = x.copy()
|
||||
labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
|
||||
labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
|
||||
labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
|
||||
labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
|
||||
else:
|
||||
labels = np.array([])
|
||||
|
||||
# Augment image and labels
|
||||
if self.augment:
|
||||
img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10))
|
||||
img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10))
|
||||
|
||||
nL = len(labels)
|
||||
if nL > 0:
|
||||
nL = len(labels) # number of labels
|
||||
if nL:
|
||||
# convert xyxy to xywh
|
||||
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size
|
||||
|
||||
if self.augment:
|
||||
# random left-right flip
|
||||
lr_flip = True
|
||||
if lr_flip & (random.random() > 0.5):
|
||||
if lr_flip and random.random() > 0.5:
|
||||
img = np.fliplr(img)
|
||||
if nL > 0:
|
||||
if nL:
|
||||
labels[:, 1] = 1 - labels[:, 1]
|
||||
|
||||
# random up-down flip
|
||||
ud_flip = False
|
||||
if ud_flip & (random.random() > 0.5):
|
||||
if ud_flip and random.random() > 0.5:
|
||||
img = np.flipud(img)
|
||||
if nL > 0:
|
||||
if nL:
|
||||
labels[:, 2] = 1 - labels[:, 2]
|
||||
|
||||
labels_out = np.zeros((100, 6), dtype=np.float32)
|
||||
if nL > 0:
|
||||
labels_out[:nL, 1:] = labels # max 100 labels per image
|
||||
labels_out = torch.zeros((nL, 6))
|
||||
if nL:
|
||||
labels_out[:, 1:] = torch.from_numpy(labels)
|
||||
|
||||
# Normalize
|
||||
img = img[:, :, ::-1].transpose(2, 0, 1) # list to np.array and BGR to RGB
|
||||
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
|
||||
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
|
||||
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
||||
|
||||
return torch.from_numpy(img), torch.from_numpy(labels_out), img_path, (h, w)
|
||||
return torch.from_numpy(img), labels_out, img_path, (h, w)
|
||||
|
||||
@staticmethod
|
||||
def collate_fn(batch):
|
||||
img, label, path, hw = list(zip(*batch)) # transposed
|
||||
for i, l in enumerate(label):
|
||||
l[:, 0] = i # add target image index for build_targets()
|
||||
return torch.stack(img, 0), torch.cat(label, 0), path, hw
|
||||
|
||||
|
||||
def letterbox(img, height=416, color=(127.5, 127.5, 127.5)): # resize a rectangular image to a padded square
|
||||
@@ -203,11 +209,13 @@ def letterbox(img, height=416, color=(127.5, 127.5, 127.5)): # resize a rectang
|
||||
return img, ratio, dw, dh
|
||||
|
||||
|
||||
def random_affine(img, targets=None, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
|
||||
def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
|
||||
borderValue=(127.5, 127.5, 127.5)):
|
||||
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
|
||||
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
|
||||
|
||||
if targets is None:
|
||||
targets = []
|
||||
border = 0 # width of added border (optional)
|
||||
height = max(img.shape[0], img.shape[1]) + border * 2
|
||||
|
||||
@@ -233,52 +241,61 @@ def random_affine(img, targets=None, degrees=(-10, 10), translate=(.1, .1), scal
|
||||
borderValue=borderValue) # BGR order borderValue
|
||||
|
||||
# Return warped points also
|
||||
if targets is not None:
|
||||
if len(targets) > 0:
|
||||
n = targets.shape[0]
|
||||
points = targets[:, 1:5].copy()
|
||||
area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
|
||||
if len(targets) > 0:
|
||||
n = targets.shape[0]
|
||||
points = targets[:, 1:5].copy()
|
||||
area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
|
||||
|
||||
# warp points
|
||||
xy = np.ones((n * 4, 3))
|
||||
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
|
||||
xy = (xy @ M.T)[:, :2].reshape(n, 8)
|
||||
# warp points
|
||||
xy = np.ones((n * 4, 3))
|
||||
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
|
||||
xy = (xy @ M.T)[:, :2].reshape(n, 8)
|
||||
|
||||
# create new boxes
|
||||
x = xy[:, [0, 2, 4, 6]]
|
||||
y = xy[:, [1, 3, 5, 7]]
|
||||
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
|
||||
# create new boxes
|
||||
x = xy[:, [0, 2, 4, 6]]
|
||||
y = xy[:, [1, 3, 5, 7]]
|
||||
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
|
||||
|
||||
# apply angle-based reduction
|
||||
radians = a * math.pi / 180
|
||||
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
|
||||
x = (xy[:, 2] + xy[:, 0]) / 2
|
||||
y = (xy[:, 3] + xy[:, 1]) / 2
|
||||
w = (xy[:, 2] - xy[:, 0]) * reduction
|
||||
h = (xy[:, 3] - xy[:, 1]) * reduction
|
||||
xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
|
||||
# apply angle-based reduction
|
||||
radians = a * math.pi / 180
|
||||
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
|
||||
x = (xy[:, 2] + xy[:, 0]) / 2
|
||||
y = (xy[:, 3] + xy[:, 1]) / 2
|
||||
w = (xy[:, 2] - xy[:, 0]) * reduction
|
||||
h = (xy[:, 3] - xy[:, 1]) * reduction
|
||||
xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
|
||||
|
||||
# reject warped points outside of image
|
||||
np.clip(xy, 0, height, out=xy)
|
||||
w = xy[:, 2] - xy[:, 0]
|
||||
h = xy[:, 3] - xy[:, 1]
|
||||
area = w * h
|
||||
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
|
||||
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
|
||||
# reject warped points outside of image
|
||||
np.clip(xy, 0, height, out=xy)
|
||||
w = xy[:, 2] - xy[:, 0]
|
||||
h = xy[:, 3] - xy[:, 1]
|
||||
area = w * h
|
||||
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
|
||||
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
|
||||
|
||||
targets = targets[i]
|
||||
targets[:, 1:5] = xy[i]
|
||||
targets = targets[i]
|
||||
targets[:, 1:5] = xy[i]
|
||||
|
||||
return imw, targets, M
|
||||
else:
|
||||
return imw
|
||||
return imw, targets
|
||||
|
||||
|
||||
def convert_tif2bmp(p='../xview/val_images_bmp'):
|
||||
import glob
|
||||
import cv2
|
||||
files = sorted(glob.glob('%s/*.tif' % p))
|
||||
for i, f in enumerate(files):
|
||||
print('%g/%g' % (i + 1, len(files)))
|
||||
cv2.imwrite(f.replace('.tif', '.bmp'), cv2.imread(f))
|
||||
os.system('rm -rf ' + f)
|
||||
def convert_images2bmp():
|
||||
# cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
|
||||
for path in ['../coco/images/val2014/', '../coco/images/train2014/']:
|
||||
folder = os.sep + Path(path).name
|
||||
output = path.replace(folder, folder + 'bmp')
|
||||
if os.path.exists(output):
|
||||
shutil.rmtree(output) # delete output folder
|
||||
os.makedirs(output) # make new output folder
|
||||
|
||||
for f in tqdm(glob.glob('%s*.jpg' % path)):
|
||||
save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp')
|
||||
cv2.imwrite(save_name, cv2.imread(f))
|
||||
|
||||
for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
|
||||
with open(label_path, 'r') as file:
|
||||
lines = file.read()
|
||||
lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace(
|
||||
'/Users/glennjocher/PycharmProjects/', '../')
|
||||
with open(label_path.replace('5k', '5k_bmp'), 'w') as file:
|
||||
file.write(lines)
|
||||
|
||||
+10
-3
@@ -3,13 +3,14 @@
|
||||
# New VM
|
||||
sudo rm -rf yolov3 && git clone https://github.com/ultralytics/yolov3
|
||||
bash yolov3/data/get_coco_dataset.sh
|
||||
bash yolov3/weights/download_yolov3_weights.sh
|
||||
sudo rm -rf cocoapi && git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3
|
||||
sudo shutdown
|
||||
|
||||
# Train
|
||||
sudo rm -rf yolov3 && git clone https://github.com/ultralytics/yolov3
|
||||
cp -r weights yolov3
|
||||
cd yolov3 && python3 train.py --batch-size 16 --epochs 1
|
||||
cd yolov3 && python3 train.py --batch-size 48 --epochs 1
|
||||
sudo shutdown
|
||||
|
||||
# Resume
|
||||
@@ -20,11 +21,17 @@ python3 detect.py
|
||||
|
||||
# Clone a branch
|
||||
sudo rm -rf yolov3 && git clone -b multi_gpu --depth 1 https://github.com/ultralytics/yolov3
|
||||
cp -r weights yolov3
|
||||
cd yolov3 && python3 train.py --batch-size 48 --epochs 1
|
||||
sudo shutdown
|
||||
|
||||
# Git pull branch
|
||||
git pull https://github.com/ultralytics/yolov3 multi_gpu
|
||||
|
||||
# Test
|
||||
sudo rm -rf yolov3 && git clone https://github.com/ultralytics/yolov3
|
||||
sudo rm -rf cocoapi && git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3
|
||||
cd yolov3 && python3 test.py --save-json --conf-thres 0.005
|
||||
cd yolov3 && python3 test.py --save-json --conf-thres 0.001 --img-size 416
|
||||
|
||||
# Test Darknet training
|
||||
python3 test.py --img_size 416 --weights ../darknet/backup/yolov3.backup
|
||||
@@ -33,7 +40,7 @@ python3 test.py --img_size 416 --weights ../darknet/backup/yolov3.backup
|
||||
wget https://storage.googleapis.com/ultralytics/yolov3.pt -O weights/latest.pt
|
||||
|
||||
# Copy latest.pt to bucket
|
||||
gsutil cp yolov3/weights/latest.pt gs://ultralytics
|
||||
gsutil cp yolov3/weights/latest1gpu.pt gs://ultralytics
|
||||
|
||||
# Copy latest.pt from bucket
|
||||
gsutil cp gs://ultralytics/latest.pt yolov3/weights/latest.pt
|
||||
|
||||
+14
-27
@@ -95,7 +95,7 @@ def weights_init_normal(m):
|
||||
|
||||
def xyxy2xywh(x):
|
||||
# Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
|
||||
y = torch.zeros_like(x) if x.dtype is torch.float32 else np.zeros_like(x)
|
||||
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
|
||||
y[:, 0] = (x[:, 0] + x[:, 2]) / 2
|
||||
y[:, 1] = (x[:, 1] + x[:, 3]) / 2
|
||||
y[:, 2] = x[:, 2] - x[:, 0]
|
||||
@@ -105,7 +105,7 @@ def xyxy2xywh(x):
|
||||
|
||||
def xywh2xyxy(x):
|
||||
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
|
||||
y = torch.zeros_like(x) if x.dtype is torch.float32 else np.zeros_like(x)
|
||||
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
|
||||
y[:, 0] = (x[:, 0] - x[:, 2] / 2)
|
||||
y[:, 1] = (x[:, 1] - x[:, 3] / 2)
|
||||
y[:, 2] = (x[:, 0] + x[:, 2] / 2)
|
||||
@@ -251,7 +251,7 @@ def wh_iou(box1, box2):
|
||||
def compute_loss(p, targets): # predictions, targets
|
||||
FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor
|
||||
loss, lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]), FT([0])
|
||||
txy, twh, tcls, tconf, indices = targets
|
||||
txy, twh, tcls, indices = targets
|
||||
MSE = nn.MSELoss()
|
||||
CE = nn.CrossEntropyLoss()
|
||||
BCE = nn.BCEWithLogitsLoss()
|
||||
@@ -260,18 +260,21 @@ def compute_loss(p, targets): # predictions, targets
|
||||
# gp = [x.numel() for x in tconf] # grid points
|
||||
for i, pi0 in enumerate(p): # layer i predictions, i
|
||||
b, a, gj, gi = indices[i] # image, anchor, gridx, gridy
|
||||
tconf = torch.zeros_like(pi0[..., 0]) # conf
|
||||
|
||||
# Compute losses
|
||||
k = 1 # nT / bs
|
||||
if len(b) > 0:
|
||||
pi = pi0[b, a, gj, gi] # predictions closest to anchors
|
||||
tconf[b, a, gj, gi] = 1 # conf
|
||||
|
||||
lxy += k * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy
|
||||
lwh += k * MSE(pi[..., 2:4], twh[i]) # wh
|
||||
lcls += (k / 4) * CE(pi[..., 5:], tcls[i])
|
||||
|
||||
# pos_weight = FT([gp[i] / min(gp) * 4.])
|
||||
# BCE = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
|
||||
lconf += (k * 64) * BCE(pi0[..., 4], tconf[i])
|
||||
lconf += (k * 64) * BCE(pi0[..., 4], tconf)
|
||||
loss = lxy + lwh + lconf + lcls
|
||||
|
||||
# Add to dictionary
|
||||
@@ -283,15 +286,13 @@ def compute_loss(p, targets): # predictions, targets
|
||||
return loss, d
|
||||
|
||||
|
||||
def build_targets(model, targets, pred):
|
||||
def build_targets(model, targets):
|
||||
# targets = [image, class, x, y, w, h]
|
||||
if isinstance(model, nn.parallel.DistributedDataParallel):
|
||||
model = model.module
|
||||
yolo_layers = get_yolo_layers(model)
|
||||
|
||||
# anchors = closest_anchor(model, targets) # [layer, anchor, i, j]
|
||||
txy, twh, tcls, tconf, indices = [], [], [], [], []
|
||||
for i, layer in enumerate(yolo_layers):
|
||||
txy, twh, tcls, indices = [], [], [], []
|
||||
for i, layer in enumerate(get_yolo_layers(model)):
|
||||
nG = model.module_list[layer][0].nG # grid size
|
||||
anchor_vec = model.module_list[layer][0].anchor_vec
|
||||
|
||||
@@ -324,12 +325,7 @@ def build_targets(model, targets, pred):
|
||||
# Class
|
||||
tcls.append(c)
|
||||
|
||||
# Conf
|
||||
tci = torch.zeros_like(pred[i][..., 0])
|
||||
tci[b, a, gj, gi] = 1 # conf
|
||||
tconf.append(tci)
|
||||
|
||||
return txy, twh, tcls, tconf, indices
|
||||
return txy, twh, tcls, indices
|
||||
|
||||
|
||||
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
|
||||
@@ -439,15 +435,6 @@ def get_yolo_layers(model):
|
||||
return [i for i, x in enumerate(bool_vec) if x] # [82, 94, 106] for yolov3
|
||||
|
||||
|
||||
def return_torch_unique_index(u, uv):
|
||||
n = uv.shape[1] # number of columns
|
||||
first_unique = torch.zeros(n, device=u.device).long()
|
||||
for j in range(n):
|
||||
first_unique[j] = (uv[:, j:j + 1] == u).all(0).nonzero()[0]
|
||||
|
||||
return first_unique
|
||||
|
||||
|
||||
def strip_optimizer_from_checkpoint(filename='weights/best.pt'):
|
||||
# Strip optimizer from *.pt files for lighter files (reduced by 2/3 size)
|
||||
a = torch.load(filename, map_location='cpu')
|
||||
@@ -480,10 +467,9 @@ def plot_results(start=0):
|
||||
# import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v3.txt')
|
||||
# from utils.utils import *; plot_results()
|
||||
|
||||
plt.figure(figsize=(14, 7))
|
||||
fig = plt.figure(figsize=(14, 7))
|
||||
s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Total Loss', 'Precision', 'Recall', 'mAP']
|
||||
files = sorted(glob.glob('results*.txt'))
|
||||
for f in files:
|
||||
for f in sorted(glob.glob('results*.txt')):
|
||||
results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11]).T # column 11 is mAP
|
||||
x = range(1, results.shape[1])
|
||||
for i in range(8):
|
||||
@@ -492,3 +478,4 @@ def plot_results(start=0):
|
||||
plt.title(s[i])
|
||||
if i == 0:
|
||||
plt.legend()
|
||||
fig.tight_layout()
|
||||
|
||||
Reference in New Issue
Block a user