函数compute_loss
for i in range(3):
conv, pred = pred_result[i*2], pred_result[i*2+1]
loss_items = compute_loss(pred, conv, *target[i], i)
giou_loss += loss_items[0]
conf_loss += loss_items[1]
prob_loss += loss_items[2]
def compute_loss(pred, conv, label, bboxes, i=0):
conv_shape = tf.shape(conv)
batch_size = conv_shape[0]
output_size = conv_shape[1]
# __C.YOLO.STRIDES = [8, 16, 32]
input_size = STRIDES[i] * output_size
conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
conv_raw_conf = conv[:, :, :, :, 4:5]
conv_raw_prob = conv[:, :, :, :, 5:]
# 预测出的4个坐标值
pred_xywh = pred[:, :, :, :, 0:4]
# 预测含object的概率
pred_conf = pred[:, :, :, :, 4:5]
# 从标签文件中提取的坐标真值
label_xywh = label[:, :, :, :, 0:4]
respond_bbox = label[:, :, :, :, 4:5] # respond_bbox 猜测存在目标为1,不存在目标为0
label_prob = label[:, :, :, :, 5:]
# ①计算xy和wh上的loss,其计算的是实际上存在目标的,利用第三步真实框编码后的的结果和未处理的预测结果进行对比得到loss
giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
input_size = tf.cast(input_size, tf.float32)
bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
giou_loss = respond_bbox * bbox_loss_scale * (1- giou)
# 对于每一幅图,计算其中所有真实框与预测框的IOU,取出每个网络点中IOU最大的先验框,
# 如果这个最大的IOU都小于ignore_thresh,意味着这个网络点内不存在目标,可以被忽略。
# 不明白这里为什么要加这么多np.newaxis
iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)
respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < IOU_LOSS_THRESH, tf.float32 )
conf_focal = tf.pow(respond_bbox - pred_conf, 2)
# 不明白conv_raw_conf和pred_conf的关系
# ②计算置信度loss
conf_loss = conf_focal * (
respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
+
respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
)
# ③计算类别条件概率的loss
prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)
giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4]))
conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4]))
prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4]))
return giou_loss, conf_loss, prob_loss
1、tf.reshape
给定后tensor,此操作将返回一个新tf.Tensor值tensor,该值具有与相同顺序相同的值,但具有由给出的新形状 shape。
tf.reshape(
tensor, shape, name=None
)
t1 = [[1, 2, 3],
[4, 5, 6]]
print(tf.shape(t1).numpy())
[2 3]
t2 = tf.reshape(t1, [6])
t2
<tf.Tensor: id=8, shape=(6,), dtype=int32, numpy=array([1, 2, 3, 4, 5, 6], dtype=int32)>
2、tf.expand_dims
返回在index处插入长度为1的轴的张量axis。
tf.expand_dims(
input, axis, name=None
)
给定的张量input,该操作插入尺寸指数在长度为1的尺寸axis的input的形状。维度索引遵循Python索引规则:从零开始,它是一个负索引,从末尾开始倒数。
image = tf.zeros([10,10,3])
tf.expand_dims(image, axis=0).shape.as_list()
[1, 10, 10, 3]
tf.expand_dims(image, axis=1).shape.as_list()
[10, 1, 10, 3]
tf.expand_dims(image, -1).shape.as_list()
[10, 10, 3, 1]
3、tf.math.reduce_max
计算张量维度上的元素最大值。
x = tf.constant([5, 1, 2, 4])
tf.reduce_max(x,axis=-1)
<tf.Tensor: id=14, shape=(), dtype=int32, numpy=5>
x = tf.constant([-5, -1, -2, -4])
tf.reduce_max(x)
<tf.Tensor: id=22, shape=(), dtype=int32, numpy=-1>
4、tf.math.pow
计算一个值对另一个值的幂。
tf.math.pow(
x, y, name=None
)
x = tf.constant([[2, 2], [3, 3]])
y = tf.constant([[8, 16], [2, 3]])
tf.pow(x, y) # [[256, 65536], [9, 27]]
<tf.Tensor: id=26, shape=(2, 2), dtype=int32, numpy=
array([[ 256, 65536],
[ 9, 27]], dtype=int32)>
5、tf.nn.sigmoid_cross_entropy_with_logits
计算给定的S形交叉熵logits
tf.nn.sigmoid_cross_entropy_with_logits(
labels=None, logits=None, name=None
)
6、tf.math.reduce_sum
计算跨张量维度的元素之和。
x = tf.constant([[1, 1, 1], [1, 1, 1]])
tf.reduce_sum(x).numpy()
6
7、tf.math.reduce_mean
计算跨张量维度的元素的均值。
函数bbox_giou
pred_xywh = pred[:, :, :, :, 0:4]
label_xywh = label[:, :, :, :, 0:4]
bbox_giou(pred_xywh, label_xywh)
def bbox_giou(boxes1, boxes2):
# 将(x,y,w,h)转化为(x-0.5*w, y-0.5*h, x+0.5*w, y+0.5*h)
boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
# 保证两个端点坐标值的大小顺序,但是这一步的必要性在哪里
boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)
# 计算两个矩形框的面积
boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
# 交叠区域左上角坐标
left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
# 交叠区域右下角坐标
right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
# 交叠区域的宽高
# 如果预测框与真实框不交叠则right_down - left_up是负值,所以这里与0对比,若为负值则取零
inter_section = tf.maximum(right_down - left_up, 0.0)
# 交叠区域面积
inter_area = inter_section[..., 0] * inter_section[..., 1]
# 并集区域面积
union_area = boxes1_area + boxes2_area - inter_area
iou = inter_area / union_area
# 计算最小闭合凸面 C 左上角和右下角的坐标
enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
# 计算最小闭合凸面 C的宽高
enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
# 计算最小闭合凸面 C的面积 = 宽 * 高
enclose_area = enclose[..., 0] * enclose[..., 1]
# 计算GIoU
giou = iou - 1.0 * (enclose_area - union_area) / enclose_area
return giou
1、tf.math.minimum
返回元素的x和y的最小值
tf.math.minimum(
x, y, name=None
)
x = tf.constant([0., 0., 0., 0.])
y = tf.constant([-5., -2., 0., 3.])
tf.math.minimum(x, y)
<tf.Tensor: id=31, shape=(4,), dtype=float32, numpy=array([-5., -2., 0., 0.], dtype=float32)>
x = tf.constant([-5., 0., 0., 0.])
y = tf.constant([-3.])
tf.math.minimum(x, y)
<tf.Tensor: id=35, shape=(4,), dtype=float32, numpy=array([-5., -3., -3., -3.], dtype=float32)>
x = tf.constant([-5., 0., 0., 0.])
y = tf.constant([-3., -1.])
tf.math.minimum(x, y)
InvalidArgumentError: Incompatible shapes: [4] vs. [2] [Op:Minimum]
2、tf.convert_to_tensor
将给定转换value为Tensor
tf.convert_to_tensor(
value, dtype=None, dtype_hint=None, name=None
)
此函数将各种类型的Python对象转换为Tensor 对象。它接受Tensor对象,numpy数组,Python列表和Python标量
扫描二维码关注公众号,回复:
12889086 查看本文章
![](/qrcode.jpg)
import numpy as np
def my_func(arg):
arg = tf.convert_to_tensor(arg, dtype=tf.float32)
return arg
value_1 = my_func(tf.constant([[1.0, 2.0], [3.0, 4.0]]))
print(value_1)
tf.Tensor(
[[1. 2.]
[3. 4.]], shape=(2, 2), dtype=float32)
value_2 = my_func([[1.0, 2.0], [3.0, 4.0]])
print(value_2)
tf.Tensor(
[[1. 2.]
[3. 4.]], shape=(2, 2), dtype=float32)
value_3 = my_func(np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32))
print(value_3)
tf.Tensor(
[[1. 2.]
[3. 4.]], shape=(2, 2), dtype=float32)
函数bbox_iou
iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :],
bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
def bbox_iou(boxes1, boxes2):
boxes1_area = boxes1[..., 2] * boxes1[..., 3]
boxes2_area = boxes2[..., 2] * boxes2[..., 3]
boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
inter_section = tf.maximum(right_down - left_up, 0.0)
inter_area = inter_section[..., 0] * inter_section[..., 1]
union_area = boxes1_area + boxes2_area - inter_area
return 1.0 * inter_area / union_area