diff --git a/yolov3/detect_on_bag.py b/yolov3/detect_on_bag.py new file mode 100644 index 00000000..0f079e84 --- /dev/null +++ b/yolov3/detect_on_bag.py @@ -0,0 +1,193 @@ + +# Subscribe to input image topic. +# object_detector ([std_msgs::Int8]) + +# Publishes the number of detected objects. + +# bounding_boxes ([darknet_ros_msgs::BoundingBoxes]) + +# Publishes an array of bounding boxes that gives information of the position and size of the bounding box in pixel coordinates. + +# detection_image ([sensor_msgs::Image]) + +# Publishes an image of the detection image including the bounding boxes. + +import rclpy +from rclpy.node import Node + +from std_msgs.msg import String +from sensor_msgs.msg import Image +from cv_bridge import CvBridge +import argparse +import os +import sys +from pathlib import Path + +import cv2 +import torch +import torch.backends.cudnn as cudnn + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[0] # root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +from models.common import DetectMultiBackend +from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams +from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, + increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) +from utils.plots import Annotator, colors, save_one_box +from utils.torch_utils import select_device, time_sync +from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox +import numpy as np + + +@torch.no_grad() +def run(weights=ROOT / 'yolov3.pt', # model.pt path(s) + source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam + imgsz=1280, # inference size (pixels) + conf_thres=0.25, # confidence threshold + iou_thres=0.45, # NMS IOU threshold + max_det=1000, # maximum detections per image + device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu + view_img=False, # show results + save_txt=False, # save results to *.txt + save_conf=False, # save confidences in --save-txt labels + save_crop=False, # save cropped prediction boxes + nosave=False, # do not save images/videos + classes=None, # filter by class: --class 0, or --class 0 2 3 + agnostic_nms=False, # class-agnostic NMS + augment=False, # augmented inference + visualize=False, # visualize features + update=False, # update all models + project=ROOT / 'runs/detect', # save results to project/name + name='exp', # save results to project/name + exist_ok=False, # existing project/name ok, do not increment + line_thickness=3, # bounding box thickness (pixels) + hide_labels=False, # hide labels + hide_conf=False, # hide confidences + half=False, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference + ): + + # Directories + save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run + (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir + + # Load model + device = select_device(device) + model = DetectMultiBackend(weights, device=device, dnn=dnn) + stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx + imgsz = check_img_size(imgsz, s=stride) # check image size + + # Half + half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA + if pt: + model.model.half() if half else model.model.float() + + # Padded resize + dataset_img = letterbox(source, imgsz, stride=stride, auto=pt)[0] + + # Convert + dataset_img = dataset_img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + dataset_img = np.ascontiguousarray(dataset_img) + # Run inference + if pt and device.type != 'cpu': + model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters()))) # warmup + dt, seen = [0.0, 0.0, 0.0], 0 + bs = 1 + t1 = time_sync() + im = dataset_img + im = torch.from_numpy(im).to(device) + im = im.half() if half else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] # expand for batch dim + t2 = time_sync() + dt[0] += t2 - t1 + pred = model(im, augment=augment, visualize=visualize) + t3 = time_sync() + pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) + dt[2] += time_sync() - t3 + + ## iterate through detections + for i, det in enumerate(pred): + seen += 1 + path = '/home/parallels' + p, im0, frame = path, source.copy(), 'frame' + p = Path(p) # to Path + save_path = '/home/parallels/predict.jpeg' + annotator = Annotator(im0, line_width=line_thickness, example=str(names)) + # import pdb; pdb.set_trace() + if len(det): + # Rescale boxes from img_size to im0 size + det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() + + # Print results + for c in det[:, -1].unique(): + n = (det[:, -1] == c).sum() # detections per class + s = f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string + + # Write results + for *xyxy, conf, cls in reversed(det): + c = int(cls) # integer class + label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') + annotator.box_label(xyxy, label, color=colors(c, True)) + # Print time (inference-only) + LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)') + # Stream results + im0 = annotator.result() + # cv2.imwrite(save_path, im0) + return im0 + +class DetectOnBag(Node): + + def __init__(self): + super().__init__('detect_on_bag') + self.subscription = self.create_subscription( + Image, + '/camera/color/image_raw', + self.image_callback, + 10) + self.subscription # prevent unused variable warning + self.detected_img_pub = self.create_publisher(Image, '/detection_image', 10) + + def image_callback(self, msg): + self.get_logger().info('Image') + print(msg.header.stamp) + + bridge = CvBridge() + cv_image = bridge.imgmsg_to_cv2(msg, desired_encoding='passthrough') + detected_img = run('runs/train/exp14/weights/best.pt', cv_image) + self.detected_img_pub.publish(self.numpy_array_to_image_msg(detected_img)) + + def numpy_array_to_image_msg(self, numpy_array): + # Create a CvBridge object + bridge = CvBridge() + + # Convert the numpy array to a cv image + cv_image = cv2.cvtColor(numpy_array, cv2.COLOR_RGB2BGR) + + # Convert the cv image to a ROS message + ros_image = bridge.cv2_to_imgmsg(cv_image, encoding="bgr8") + + return ros_image + + +def main(args=None): + rclpy.init(args=args) + + detect_on_bag = DetectOnBag() + + rclpy.spin(detect_on_bag) + + # Destroy the node explicitly + # (optional - otherwise it will be done automatically + # when the garbage collector destroys the node object) + detect_on_bag.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() \ No newline at end of file