CV

Bounding Box

"边框坐标编码表示"

Posted by Stephen on April 2, 2022

前言

了解视觉:物体识别的边框坐标的表示方法,目前业界有三种边框坐标编码。

边框是在图像上标记目标的矩形。边框的标注有多种格式。每种格式都使用其特定的边框坐标表示。常见的包括

  1. Pascal VOC
  2. COCO
  3. YOLO

环境

系统环境

Distributor ID:	Ubuntu
Description:	Ubuntu 18.04.4 LTS
Release:	18.04
Codename:	bionic
Linux version :       5.3.0-46-generic ( buildd@lcy01-amd64-013 ) 
Gcc version:         7.5.0  ( Ubuntu 7.5.0-3ubuntu1~18.04 )

软件信息

version : 	
     None

正文

pascal_voc

边框坐标编码是[x_min, y_min, x_max, y_max] x_min和y_min表示边框左上角坐标,x_max和y_max表示边框的右下脚坐标。 上图例子就是[98, 345, 420, 462]

coco

边框坐标编码是[x_min, y_min, width, height] 表示左上角的坐标以及边框的宽度和高度。 上图例子就是[98, 345, 322, 117]

yolo

边框坐标编码[x_center, y_center, width, height] 这4个值是经过数据规范化(normalized )的。 x_center, y_center表示边框的中心位置, width, height分别表示边框的宽度和高度 边框的宽度是322,高度是117 不规范化是 [(98 + (322 / 2)), (345 + (117 / 2)), 322, 117] =[259, 403.5, 322, 117] 规范化方法是 [259 / 640, 403.5 / 480, 322 / 640, 117 / 480] 最终结果是 [0.4046875, 0.840625, 0.503125, 0.24375].

实践

YOLOv5 用的就是yolo的边框表示方法 coco格式的坐标转yolo格式的工具 工具使用方法 在各种格式转到YOLOv5格式之后,防止转换错误,最后检查一下,可视化一下标注结果。

import numpy as np
import cv2
import torch

label_path = './1.txt'
image_path = './1.jpg'

#坐标转换,原始存储的是YOLOv5格式
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):

    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw  # top left x
    y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh  # top left y
    y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw  # bottom right x
    y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh  # bottom right y
    return y

#读取labels
with open(label_path, 'r') as f:
    lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)  # labels
    print(lb)

# 读取图像文件
img = cv2.imread(str(image_path))
h, w = img.shape[:2]
lb[:, 1:] = xywhn2xyxy(lb[:, 1:], w, h, 0, 0)#反归一化
print(lb)

#绘图
for _, x in enumerate(lb):
    class_label = int(x[0])  # class

    cv2.rectangle(img,(x[1],x[2]),(x[3],x[4]),(0, 255, 0) )
    cv2.putText(img,str(class_label), (int(x[1]), int(x[2] - 2)),fontFace = cv2.FONT_HERSHEY_SIMPLEX,fontScale=1,color=(0, 0, 255),thickness=2)
cv2.imshow('show', img)
cv2.waitKey(0)#按键结束
cv2.destroyAllWindows()

绘制一个文件夹中的所有图片,结果写在check目录

import numpy as np
import cv2
import torch

source_directory_img_path="../images/train"
source_directory_label_path="../labels/train"
target_directory_path= "../check"

#坐标转换,原始存储的是YOLOv5格式
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):

    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw  # top left x
    y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh  # top left y
    y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw  # bottom right x
    y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh  # bottom right y
    return y

import os
def draw_label(image_path,label_path):
    with open(label_path, 'r') as f:
        lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)  # labels
        #print(lb)
    # 读取图像文件
    img = cv2.imread(str(image_path))
    h, w = img.shape[:2]
    lb[:, 1:] = xywhn2xyxy(lb[:, 1:], w, h, 0, 0)  # 反归一化
    #print(lb)

    # 绘图
    for _, x in enumerate(lb):
        class_label = int(x[0])  # class

        cv2.rectangle(img, (x[1], x[2]), (x[3], x[4]), (0, 255, 0))
        cv2.putText(img, str(class_label), (int(x[1]), int(x[2] - 2)), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1,
                    color=(0, 0, 255), thickness=2)
    return     img

if __name__ == '__main__':
    for root, dirs, files in os.walk(source_directory_img_path):
            for f in files:
                file_name = f.split('.')[0]+".txt"
                image_path = os.path.join(source_directory_img_path, f)
                label_path =os.path.join(source_directory_label_path, file_name)
                target =os.path.join(target_directory_path, f)
                img= draw_label(image_path, label_path)
                print(target)
                cv2.imwrite(target, img);

参考: