#目标检测理论:边界框、锚框、IOU计算与NMS算法详解
#引言
目标检测是计算机视觉中的核心任务之一,旨在识别图像中感兴趣对象的类别并精确定位其位置。与图像分类任务不同,目标检测不仅要回答"图像中有什么",还要回答"对象在哪里"的问题。本文将深入探讨目标检测的核心理论概念,包括边界框表示、IOU计算和NMS算法等关键技术。
📂 所属阶段:第二阶段 — 深度学习视觉基础(CNN 篇)
🔗 相关章节:迁移学习 (Transfer Learning) · YOLO 家族实战
#1. 目标检测基础概念
#1.1 目标检测任务定义
目标检测是计算机视觉中的重要任务,它结合了分类和定位两个子任务。
"""
目标检测任务的核心要素:
1. 分类 (Classification):
- 识别图像中对象的类别
- 如:人、车、动物、建筑物等
2. 定位 (Localization):
- 确定对象在图像中的精确位置
- 通常使用边界框表示
3. 多目标检测:
- 同时检测图像中的多个对象
- 每个对象都有对应的类别和位置信息
数学表示:
输入:图像 I ∈ R^(H×W×C)
输出:{(class_i, bbox_i, confidence_i)}_i=1^N
其中 N 是检测到的对象数量
"""
def target_detection_concept():
"""
目标检测概念解释
"""
concept_explanation = {
"Task": "Object Detection = Classification + Localization",
"Input": "Single Image",
"Output": "Multiple bounding boxes with class labels",
"Challenge": "Handle multiple objects of various sizes and positions"
}
print("目标检测核心概念:")
for key, value in concept_explanation.items():
print(f"• {key}: {value}")
target_detection_concept()#1.2 目标检测的应用场景
def detection_applications():
"""
目标检测应用场景
"""
applications = {
"Autonomous Driving": "检测车辆、行人、交通信号灯、道路标志",
"Surveillance": "检测异常行为、入侵者、可疑物品",
"Industrial Inspection": "检测产品缺陷、尺寸偏差、表面瑕疵",
"Medical Imaging": "检测病变区域、器官、异常组织",
"Retail": "检测商品、顾客行为、货架库存",
"Agriculture": "检测作物病虫害、杂草、果实成熟度",
"Security": "人脸识别、危险物品检测、人群密度监测"
}
print("目标检测主要应用场景:")
for app, desc in applications.items():
print(f"• {app}: {desc}")
detection_applications()#1.3 目标检测算法发展史
def detection_timeline():
"""
目标检测算法发展历程
"""
timeline = {
"2014": "R-CNN: Region-based CNN, 首次将CNN用于目标检测",
"2015": "Fast R-CNN: 改进R-CNN速度",
"2015": "Faster R-CNN: 引入RPN,端到端训练",
"2016": "YOLO: 单阶段检测器,速度更快",
"2016": "SSD: Single Shot MultiBox Detector",
"2018": "YOLOv3: 多尺度预测,精度提升",
"2020": "YOLOv4: 集成多项改进技术",
"2020": "DETR: 基于Transformer的目标检测",
"2021": "YOLOv5: 简化实现,易于使用",
"2022": "YOLOv6, YOLOv7: 进一步优化",
"2023": "YOLOv8: 当前最先进的一阶段检测器"
}
print("目标检测算法发展时间线:")
for year, alg in timeline.items():
print(f"• {year}: {alg}")
detection_timeline()#2. 边界框表示与操作
#2.1 边界框表示方法
边界框是目标检测中表示对象位置的基本单位,有多种表示方法。
"""
边界框的常见表示方法:
1. Corner Coordinates (角点坐标):
- 格式:(x_min, y_min, x_max, y_max)
- 含义:左上角和右下角坐标
2. Center Coordinates (中心坐标):
- 格式:(center_x, center_y, width, height)
- 含义:中心点坐标和宽高
3. Normalized Coordinates (归一化坐标):
- 坐标值在[0,1]范围内
- 便于处理不同尺寸的图像
"""
class BoundingBox:
"""
边界框类,支持多种表示方法的转换
"""
def __init__(self, x1, y1, x2, y2, label=None, confidence=None):
"""
初始化边界框(使用角点坐标)
Args:
x1, y1: 左上角坐标
x2, y2: 右下角坐标
label: 类别标签
confidence: 置信度
"""
self.x1 = x1
self.y1 = y1
self.x2 = x2
self.y2 = y2
self.label = label
self.confidence = confidence
@property
def width(self):
"""宽度"""
return self.x2 - self.x1
@property
def height(self):
"""高度"""
return self.y2 - self.y1
@property
def area(self):
"""面积"""
return self.width * self.height
@property
def center_x(self):
"""中心点x坐标"""
return (self.x1 + self.x2) / 2
@property
def center_y(self):
"""中心点y坐标"""
return (self.y1 + self.y2) / 2
def to_center_format(self):
"""转换为中心坐标格式"""
return (self.center_x, self.center_y, self.width, self.height)
def to_corner_format(self):
"""返回角点坐标格式"""
return (self.x1, self.y1, self.x2, self.y2)
def scale(self, scale_factor):
"""缩放边界框"""
center_x = self.center_x
center_y = self.center_y
new_width = self.width * scale_factor
new_height = self.height * scale_factor
new_x1 = center_x - new_width / 2
new_y1 = center_y - new_height / 2
new_x2 = center_x + new_width / 2
new_y2 = center_y + new_height / 2
return BoundingBox(new_x1, new_y1, new_x2, new_y2, self.label, self.confidence)
def __repr__(self):
return f"BBox({self.x1:.2f}, {self.y1:.2f}, {self.x2:.2f}, {self.y2:.2f})"
def demonstrate_bbox_formats():
"""
演示边界框的不同表示方法
"""
# 创建一个边界框
bbox = BoundingBox(10, 10, 110, 60, label="person", confidence=0.95)
print("边界框表示方法演示:")
print(f"角点坐标: {bbox.to_corner_format()}")
print(f"中心坐标: {bbox.to_center_format()}")
print(f"宽度: {bbox.width}, 高度: {bbox.height}")
print(f"面积: {bbox.area}")
print(f"中心点: ({bbox.center_x}, {bbox.center_y})")
demonstrate_bbox_formats()#2.2 边界框操作函数
def convert_bbox_formats(x1, y1, x2, y2):
"""
在不同边界框格式之间转换
Args:
x1, y1: 左上角坐标
x2, y2: 右下角坐标
Returns:
center_x, center_y, width, height
"""
center_x = (x1 + x2) / 2
center_y = (y1 + y2) / 2
width = x2 - x1
height = y2 - y1
return center_x, center_y, width, height
def convert_from_center(center_x, center_y, width, height):
"""
从中心坐标格式转换为角点坐标格式
Args:
center_x, center_y: 中心点坐标
width, height: 宽度和高度
Returns:
x1, y1, x2, y2
"""
x1 = center_x - width / 2
y1 = center_y - height / 2
x2 = center_x + width / 2
y2 = center_y + height / 2
return x1, y1, x2, y2
def clip_bbox(bbox, img_width, img_height):
"""
将边界框限制在图像边界内
Args:
bbox: (x1, y1, x2, y2)
img_width, img_height: 图像尺寸
Returns:
clipped_bbox: 限制后的边界框
"""
x1, y1, x2, y2 = bbox
x1 = max(0, min(x1, img_width))
y1 = max(0, min(y1, img_height))
x2 = max(0, min(x2, img_width))
y2 = max(0, min(y2, img_height))
return x1, y1, x2, y2
def bbox_operations_demo():
"""
边界框操作演示
"""
print("边界框操作演示:")
# 原始边界框
x1, y1, x2, y2 = 10, 10, 110, 60
print(f"原始角点坐标: ({x1}, {y1}, {x2}, {y2})")
# 转换为中心坐标
cx, cy, w, h = convert_bbox_formats(x1, y1, x2, y2)
print(f"中心坐标格式: ({cx}, {cy}, {w}, {h})")
# 转换回角点坐标
x1_back, y1_back, x2_back, y2_back = convert_from_center(cx, cy, w, h)
print(f"转换回角点坐标: ({x1_back}, {y1_back}, {x2_back}, {y2_back})")
# 边界框裁剪
oversized_bbox = (-10, -10, 500, 500) # 超出图像边界
clipped = clip_bbox(oversized_bbox, 224, 224)
print(f"裁剪前: {oversized_bbox}")
print(f"裁剪后: {clipped}")
bbox_operations_demo()#3. IOU(交并比)计算
#3.1 IOU基本概念
IOU(Intersection over Union)是目标检测中最重要的评价指标之一。
"""
IOU (Intersection over Union) - 交并比
定义:
IOU = Area of Intersection / Area of Union
用途:
1. 评估预测框与真实框的匹配程度
2. NMS算法中的关键计算
3. 目标检测性能评价指标
阈值判断:
- IOU > 0.5: 良好匹配
- IOU > 0.7: 优秀匹配
- IOU < 0.3: 匹配较差
"""
def calculate_iou(box1, box2):
"""
计算两个边界框的IOU
Args:
box1, box2: (x1, y1, x2, y2) 格式的边界框
Returns:
iou: IOU值 (0-1之间)
"""
x1_inter = max(box1[0], box2[0]) # 交集左上角x
y1_inter = max(box1[1], box2[1]) # 交集左上角y
x2_inter = min(box1[2], box2[2]) # 交集右下角x
y2_inter = min(box1[3], box2[3]) # 交集右下角y
# 检查是否有交集
if x2_inter <= x1_inter or y2_inter <= y1_inter:
return 0.0
# 计算交集面积
inter_width = x2_inter - x1_inter
inter_height = y2_inter - y1_inter
inter_area = inter_width * inter_height
# 计算各自面积
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
# 计算并集面积
union_area = box1_area + box2_area - inter_area
# 计算IOU
iou = inter_area / union_area
return iou
def iou_visualization():
"""
IOU计算可视化说明
"""
print("IOU计算步骤:")
print("1. 找到两个框的交集区域")
print("2. 计算交集面积")
print("3. 计算各自面积")
print("4. 计算并集面积 = 面积1 + 面积2 - 交集面积")
print("5. IOU = 交集面积 / 并集面积")
# 示例计算
box1 = (10, 10, 60, 60) # 50x50的框
box2 = (40, 40, 90, 90) # 50x50的框,与box1有重叠
iou = calculate_iou(box1, box2)
print(f"\n示例计算:")
print(f"Box1: {box1}")
print(f"Box2: {box2}")
print(f"IOU: {iou:.3f}")
iou_visualization()#3.2 IOU的高级应用
def calculate_batch_iou(boxes1, boxes2):
"""
批量计算IOU
Args:
boxes1: [(x1, y1, x2, y2), ...] 第一批边界框
boxes2: [(x1, y1, x2, y2), ...] 第二批边界框
Returns:
iou_matrix: IOU矩阵,shape为(len(boxes1), len(boxes2))
"""
import numpy as np
iou_matrix = np.zeros((len(boxes1), len(boxes2)))
for i, box1 in enumerate(boxes1):
for j, box2 in enumerate(boxes2):
iou_matrix[i, j] = calculate_iou(box1, box2)
return iou_matrix
def generalized_iou(bbox1, bbox2):
"""
Generalized IOU (GIOU) - 扩展的IOU计算
GIOU = IOU - (C-A_union)/C
其中C是包含两个框的最小闭包区域
"""
# 计算IOU
iou = calculate_iou(bbox1, bbox2)
# 计算闭包区域
x1_c = min(bbox1[0], bbox2[0])
y1_c = min(bbox1[1], bbox2[1])
x2_c = max(bbox1[2], bbox2[2])
y2_c = max(bbox1[3], bbox2[3])
area_c = (x2_c - x1_c) * (y2_c - y1_c)
if area_c == 0:
return iou
# 计算GIOU
union_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]) + \
(bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1]) - \
calculate_iou(bbox1, bbox2) * area_c # 这里有个简化
giou = iou - (area_c - union_area) / area_c
return giou
def iou_advanced_usage():
"""
IOU高级应用示例
"""
print("IOU高级应用:")
print("• 批量IOU计算: 用于评估多个预测框")
print("• GIOU: 解决非重叠框的梯度问题")
print("• DIoU/CIoU: 考虑中心点距离和宽高比")
# 示例
boxes1 = [(10, 10, 60, 60), (30, 30, 80, 80)]
boxes2 = [(40, 40, 90, 90), (20, 20, 70, 70)]
batch_iou = calculate_batch_iou(boxes1, boxes2)
print(f"\n批量IOU矩阵:\n{batch_iou}")
iou_advanced_usage()#4. NMS(非极大值抑制)
#4.1 NMS基本原理
NMS是目标检测中去除重复检测框的关键算法。
"""
NMS (Non-Maximum Suppression) - 非极大值抑制
目的:去除重复的检测框,保留最优的检测结果
基本流程:
1. 按置信度分数降序排列所有检测框
2. 选择置信度最高的框作为当前框
3. 计算当前框与其他框的IOU
4. 抑制(删除)IOU大于阈值的框
5. 重复步骤2-4直到处理完所有框
参数:
- iou_threshold: IOU阈值,通常为0.5
"""
def nms_naive(boxes, scores, iou_threshold=0.5):
"""
基础版本的NMS算法
Args:
boxes: [(x1, y1, x2, y2), ...] 边界框列表
scores: [score1, score2, ...] 置信度分数列表
iou_threshold: IOU阈值
Returns:
keep_indices: 保留的框的索引列表
"""
if len(boxes) == 0:
return []
# 按分数降序排列框的索引
indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
keep = []
while indices:
# 选择分数最高的框
current = indices[0]
keep.append(current)
# 移除已选择的框
indices = indices[1:]
# 计算当前框与其他框的IOU,移除重叠过多的框
current_box = boxes[current]
indices = [
i for i in indices
if calculate_iou(current_box, boxes[i]) < iou_threshold
]
return keep
def soft_nms(boxes, scores, iou_threshold=0.5, sigma=0.5, score_threshold=0.001):
"""
Soft-NMS: 改进版本的NMS,通过降低分数而非直接删除来处理重叠框
Args:
boxes: 边界框列表
scores: 置信度分数列表
iou_threshold: IOU阈值
sigma: Soft-NMS的sigma参数
score_threshold: 分数阈值,低于此值的框会被丢弃
Returns:
keep_indices: 保留的框的索引列表
"""
scores = scores.copy() # 避免修改原始分数
keep = []
for i in range(len(boxes)):
# 找到当前最高分数的框
max_idx = -1
max_score = -1
for j in range(len(scores)):
if scores[j] > max_score:
max_score = scores[j]
max_idx = j
if max_score < score_threshold:
break
keep.append(max_idx)
current_box = boxes[max_idx]
# 对其他框应用Soft-NMS
for j in range(len(scores)):
if j != max_idx:
iou = calculate_iou(current_box, boxes[j])
if iou > iou_threshold:
# 使用高斯权重降低分数
scores[j] = scores[j] * math.exp(-(iou * iou) / (2 * sigma * sigma))
# 将当前框的分数设为0,避免重复选择
scores[max_idx] = 0
return keep
import math
def nms_demonstration():
"""
NMS算法演示
"""
print("NMS算法演示:")
# 示例数据
boxes = [
(10, 10, 60, 60), # 框1
(15, 15, 65, 65), # 与框1重叠
(50, 50, 100, 100), # 与框1,2都重叠
(120, 120, 170, 170) # 独立的框
]
scores = [0.9, 0.8, 0.7, 0.95] # 对应的置信度分数
print("原始检测框和分数:")
for i, (box, score) in enumerate(zip(boxes, scores)):
print(f"框{i+1}: {box}, 分数: {score}")
# 应用NMS
keep_indices = nms_naive(boxes, scores, iou_threshold=0.5)
print(f"\nNMS后保留的框 (IOU阈值=0.5):")
for idx in keep_indices:
print(f"框{idx+1}: {boxes[idx]}, 分数: {scores[idx]}")
nms_demonstration()#4.2 NMS的优化变体
def diou_nms(boxes, scores, iou_threshold=0.5):
"""
DIoU-NMS: 考虑中心点距离的NMS变体
DIoU = IOU - ρ²(b,b')/c²
其中ρ是中心点距离,c是闭包对角线距离
"""
def calculate_diou(box1, box2):
"""计算DIoU"""
# 计算IOU
iou = calculate_iou(box1, box2)
# 计算中心点距离
center1_x = (box1[0] + box1[2]) / 2
center1_y = (box1[1] + box1[3]) / 2
center2_x = (box2[0] + box2[2]) / 2
center2_y = (box2[1] + box2[3]) / 2
center_dist = ((center1_x - center2_x) ** 2 + (center1_y - center2_y) ** 2)
# 计算闭包区域对角线距离
x1_c = min(box1[0], box2[0])
y1_c = min(box1[1], box2[1])
x2_c = max(box1[2], box2[2])
y2_c = max(box1[3], box2[3])
diag_dist = ((x2_c - x1_c) ** 2 + (y2_c - y1_c) ** 2)
if diag_dist == 0:
return iou
diou = iou - center_dist / diag_dist
return diou
if len(boxes) == 0:
return []
# 按分数降序排列
indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
keep = []
while indices:
current = indices[0]
keep.append(current)
indices = indices[1:]
# 使用DIoU进行抑制
current_box = boxes[current]
indices = [
i for i in indices
if calculate_diou(current_box, boxes[i]) < iou_threshold
]
return keep
def adaptive_nms(boxes, scores, labels, iou_threshold=0.5):
"""
Adaptive NMS: 根据类别信息调整NMS策略
对于不同类别的框,可以使用不同的IOU阈值
"""
# 按分数排序
sorted_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
keep = []
suppressed = [False] * len(boxes)
for i in sorted_indices:
if suppressed[i]:
continue
keep.append(i)
# 对于相同类别的框进行NMS
for j in sorted_indices[i+1:]:
if suppressed[j]:
continue
# 只对相同类别的框进行NMS
if labels[i] == labels[j]:
if calculate_iou(boxes[i], boxes[j]) > iou_threshold:
suppressed[j] = True
return keep
def nms_variants_comparison():
"""
NMS变体对比
"""
variants = {
"Standard NMS": "传统NMS,硬性删除重叠框",
"Soft NMS": "通过降低分数而非删除处理重叠",
"DIoU NMS": "考虑中心点距离,更适合密集场景",
"Adaptive NMS": "根据不同类别调整策略",
"Cluster NMS": "对相关框进行聚类而非抑制"
}
print("NMS算法变体对比:")
for variant, desc in variants.items():
print(f"• {variant}: {desc}")
nms_variants_comparison()#5. 锚框机制
#5.1 锚框基本概念
锚框是现代目标检测算法中的重要概念,特别是在两阶段检测器中。
"""
锚框 (Anchor Boxes) - 预定义的参考框
概念:
- 在特征图的每个位置放置多个不同尺寸和比例的参考框
- 用于预测相对于这些参考框的偏移量
- 解决了多尺度检测问题
参数:
- 尺寸 (Scale): 锚框的大小
- 比例 (Aspect Ratio): 宽高比,如1:1, 1:2, 2:1
- 位置 (Location): 在特征图上的坐标
优势:
- 提高检测精度
- 处理多尺度对象
- 加速训练过程
"""
class AnchorGenerator:
"""
锚框生成器
"""
def __init__(self, scales, aspect_ratios, strides):
"""
初始化锚框生成器
Args:
scales: 锚框尺寸列表,如[32, 64, 128]
aspect_ratios: 宽高比列表,如[0.5, 1.0, 2.0]
strides: 步长列表,对应不同层级的特征图
"""
self.scales = scales
self.aspect_ratios = aspect_ratios
self.strides = strides
def generate_anchors_single_level(self, feat_h, feat_w, stride):
"""
为单个特征图层级生成锚框
"""
# 生成网格点
shifts_x = torch.arange(0, feat_w * stride, step=stride, dtype=torch.float32)
shifts_y = torch.arange(0, feat_h * stride, step=stride, dtype=torch.float32)
shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
shift_x, shift_y = shift_x.contiguous(), shift_y.contiguous()
shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=2)
# 生成基础锚框
areas = torch.tensor(self.scales) ** 2
aspect_ratios_tensor = torch.tensor(self.aspect_ratios)
h_ratios = torch.sqrt(aspect_ratios_tensor)
w_ratios = 1.0 / h_ratios
ws = (w_ratios[:, None] * areas[None, :]).view(-1)
hs = (h_ratios[:, None] * areas[None, :]).view(-1)
base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2
# 将基础锚框应用到每个网格点
num_anchors = base_anchors.shape[0]
num_locations = shifts.shape[0] * shifts.shape[1]
shifts_expanded = shifts.reshape(-1, 1, 4).expand(-1, num_anchors, -1)
base_anchors_expanded = base_anchors[None, :, :].expand(num_locations, -1, -1)
anchors = shifts_expanded + base_anchors_expanded
anchors = anchors.reshape(-1, 4)
return anchors
def anchor_generation_demo():
"""
锚框生成演示
"""
try:
import torch
print("锚框生成演示:")
# 创建锚框生成器
anchor_gen = AnchorGenerator(
scales=[32, 64, 128],
aspect_ratios=[0.5, 1.0, 2.0],
strides=[16]
)
# 生成锚框(示例特征图大小为50x50)
anchors = anchor_gen.generate_anchors_single_level(50, 50, 16)
print(f"在50x50特征图上生成了 {len(anchors)} 个锚框")
print(f"每个位置有 {len(anchor_gen.scales) * len(anchor_gen.aspect_ratios)} 个锚框")
print(f"锚框尺寸: {anchor_gen.scales}")
print(f"宽高比: {anchor_gen.aspect_ratios}")
except ImportError:
print("PyTorch未安装,跳过锚框生成演示")
print("锚框概念:")
print("• 在特征图每个位置放置多个预定义框")
print("• 不同尺寸和比例适应不同目标")
print("• 用于预测相对偏移而非绝对坐标")
anchor_generation_demo()#5.2 锚框与预测
def anchor_matching(gt_boxes, anchors, threshold=0.7):
"""
将真实框与锚框匹配
Args:
gt_boxes: 真实边界框列表
anchors: 锚框列表
threshold: 匹配阈值
Returns:
matches: 匹配结果
"""
matches = []
for gt_box in gt_boxes:
ious = [calculate_iou(gt_box, anchor) for anchor in anchors]
max_iou_idx = max(range(len(ious)), key=lambda i: ious[i])
max_iou = ious[max_iou_idx]
if max_iou >= threshold:
matches.append((gt_box, anchors[max_iou_idx], max_iou))
return matches
def bbox_regression_targets(anchor, gt_box):
"""
计算边界框回归目标
Args:
anchor: 锚框 (x1, y1, x2, y2)
gt_box: 真实框 (x1, y1, x2, y2)
Returns:
dx, dy, dw, dh: 回归目标
"""
# 计算中心点和宽高
anchor_cx = (anchor[0] + anchor[2]) / 2
anchor_cy = (anchor[1] + anchor[3]) / 2
anchor_w = anchor[2] - anchor[0]
anchor_h = anchor[3] - anchor[1]
gt_cx = (gt_box[0] + gt_box[2]) / 2
gt_cy = (gt_box[1] + gt_box[3]) / 2
gt_w = gt_box[2] - gt_box[0]
gt_h = gt_box[3] - gt_box[1]
# 计算回归目标
dx = (gt_cx - anchor_cx) / anchor_w
dy = (gt_cy - anchor_cy) / anchor_h
dw = math.log(gt_w / anchor_w)
dh = math.log(gt_h / anchor_h)
return dx, dy, dw, dh
def anchor_based_detection():
"""
基于锚框的检测流程
"""
print("基于锚框的检测流程:")
print("1. 在特征图上生成密集锚框")
print("2. 使用CNN预测每个锚框的:")
print(" • 对象性分数 (objectness)")
print(" • 边界框偏移 (bbox regression)")
print(" • 类别概率 (classification)")
print("3. 根据阈值筛选正负样本")
print("4. 使用NMS去除重复检测")
# 示例
gt_box = (50, 50, 150, 150) # 真实框
anchor = (45, 45, 145, 145) # 对应锚框
dx, dy, dw, dh = bbox_regression_targets(anchor, gt_box)
print(f"\n回归目标示例:")
print(f"锚框: {anchor}")
print(f"真实框: {gt_box}")
print(f"回归目标: dx={dx:.3f}, dy={dy:.3f}, dw={dw:.3f}, dh={dh:.3f}")
anchor_based_detection()#6. 目标检测评估指标
#6.1 常用评估指标
"""
目标检测评估指标:
1. mAP (mean Average Precision):
- 所有类别的平均精度均值
- 最重要的评估指标
2. AP (Average Precision):
- 某个类别的平均精度
- 基于Precision-Recall曲线
3. IoU Threshold:
- 通常使用0.5或0.7作为阈值
- COCO使用0.5:0.95的多个阈值
4. Recall:
- 检测到的正样本 / 所有正样本
"""
def calculate_ap(recalls, precisions):
"""
计算Average Precision (AP)
Args:
recalls: 召回率列表
precisions: 精度列表
Returns:
ap: 平均精度
"""
# 计算precision-recall曲线下的面积
# 使用11-point interpolation
ap = 0.0
for t in [i/10.0 for i in range(11)]:
# 找到召回率>=t时的最大精度
precision_at_t = 0.0
for r, p in zip(recalls, precisions):
if r >= t and p > precision_at_t:
precision_at_t = p
ap += precision_at_t
ap /= 11
return ap
def evaluate_detection(predictions, ground_truths, iou_threshold=0.5):
"""
评估目标检测结果
Args:
predictions: 预测结果 [(bbox, class, score), ...]
ground_truths: 真实标注 [(bbox, class), ...]
iou_threshold: IOU阈值
Returns:
evaluation_results: 评估结果
"""
# 按类别分组
gt_by_class = {}
for bbox, cls in ground_truths:
if cls not in gt_by_class:
gt_by_class[cls] = []
gt_by_class[cls].append(bbox)
pred_by_class = {}
for bbox, cls, score in predictions:
if cls not in pred_by_class:
pred_by_class[cls] = []
pred_by_class[cls].append((bbox, score))
# 计算每个类别的AP
aps = {}
for cls in gt_by_class:
if cls in pred_by_class:
gt_boxes = gt_by_class[cls]
pred_boxes_scores = pred_by_class[cls]
# 按分数排序预测结果
pred_boxes_scores.sort(key=lambda x: x[1], reverse=True)
pred_boxes = [x[0] for x in pred_boxes_scores]
scores = [x[1] for x in pred_boxes_scores]
# 计算TP和FP
tp = [0] * len(pred_boxes)
fp = [0] * len(pred_boxes)
matched_gt = set()
for i, pred_box in enumerate(pred_boxes):
max_iou = 0
max_gt_idx = -1
# 找到与当前预测框IOU最大的真实框
for j, gt_box in enumerate(gt_boxes):
if j in matched_gt:
continue
iou = calculate_iou(pred_box, gt_box)
if iou > max_iou:
max_iou = iou
max_gt_idx = j
if max_iou >= iou_threshold:
tp[i] = 1
matched_gt.add(max_gt_idx)
else:
fp[i] = 1
# 计算cumulative tp和fp
cum_tp = [sum(tp[:i+1]) for i in range(len(tp))]
cum_fp = [sum(fp[:i+1]) for i in range(len(fp))]
# 计算precision和recall
precisions = [tp/(tp+fp) if (tp+fp)>0 else 0 for tp, fp in zip(cum_tp, cum_fp)]
recalls = [tp/len(gt_boxes) if len(gt_boxes)>0 else 0 for tp in cum_tp]
# 计算AP
aps[cls] = calculate_ap(recalls, precisions)
else:
aps[cls] = 0.0
# 计算mAP
mAP = sum(aps.values()) / len(aps) if aps else 0.0
return {
'mAP': mAP,
'APs': aps,
'num_classes': len(aps)
}
def evaluation_metrics_summary():
"""
评估指标总结
"""
metrics = {
"mAP@0.5": "IoU阈值为0.5的平均精度",
"mAP@0.7": "IoU阈值为0.7的平均精度",
"mAP@0.5:0.95": "COCO标准,多个IoU阈值的平均",
"AP50": "IoU=0.5时的AP",
"AP75": "IoU=0.75时的AP",
"AR": "Average Recall (平均召回率)"
}
print("目标检测评估指标:")
for metric, desc in metrics.items():
print(f"• {metric}: {desc}")
evaluation_metrics_summary()#相关教程
#7. 总结
目标检测理论包含了计算机视觉中的一些核心概念:
关键技术:
- 边界框表示:多种格式及其相互转换
- IOU计算:评估框的重叠程度
- NMS算法:去除重复检测
- 锚框机制:预定义参考框
核心要点:
- 理解不同边界框格式的转换
- 掌握IOU的计算方法和应用
- 熟悉NMS算法的原理和变体
- 了解锚框在现代检测器中的作用
💡 重要提醒:IOU和NMS是目标检测算法的核心,几乎所有的现代检测器都依赖这些基础概念。深入理解它们对于掌握目标检测技术至关重要。
🔗 扩展阅读

