CV-1-目标检测-03-SS-02-selectivesearch

# -*- coding: utf-8 -*-

import cv2 as cv
import skimage.io
import skimage.feature
import skimage.color
import skimage.transform
import skimage.util
import skimage.segmentation
import numpy


# "Selective Search for Object Recognition" by J.R.R. Uijlings et al.
#
#  - Modified version with LBP extractor for texture vectorization


def _generate_segments(im_orig, scale, sigma, min_size):
    """
        segment smallest regions by the algorithm of Felzenswalb and
        Huttenlocher
    """

    # open the Image
    # min_size:一般用于限制区域框的面积大小。
    im_mask = skimage.segmentation.felzenszwalb(
        skimage.util.img_as_float(im_orig), scale=scale, sigma=sigma,
        min_size=min_size)

    # merge mask channel to the image as a 4th channel
    im_orig = numpy.append(
        im_orig, numpy.zeros(im_orig.shape[:2])[:, :, numpy.newaxis], axis=2)
    im_orig[:, :, 3] = im_mask

    return im_orig


def _sim_colour(r1, r2):
    """
        calculate the sum of histogram intersection of colour
    """
    return sum([min(a, b) for a, b in zip(r1["hist_c"], r2["hist_c"])])


def _sim_texture(r1, r2):
    """
        calculate the sum of histogram intersection of texture
    """
    return sum([min(a, b) for a, b in zip(r1["hist_t"], r2["hist_t"])])


def _sim_size(r1, r2, imsize):
    """
        calculate the size similarity over the image
    """
    return 1.0 - (r1["size"] + r2["size"]) / imsize


def _sim_fill(r1, r2, imsize):
    """
        calculate the fill similarity over the image
    """
    bbsize = (
            (max(r1["max_x"], r2["max_x"]) - min(r1["min_x"], r2["min_x"]))
            * (max(r1["max_y"], r2["max_y"]) - min(r1["min_y"], r2["min_y"]))
    )
    return 1.0 - (bbsize - r1["size"] - r2["size"]) / imsize


def _calc_sim(r1, r2, imsize):
    return (_sim_colour(r1, r2) + _sim_texture(r1, r2)
            + _sim_size(r1, r2, imsize) + _sim_fill(r1, r2, imsize))


def _calc_colour_hist(img):
    """
        calculate colour histogram for each region

        the size of output histogram will be BINS * COLOUR_CHANNELS(3)

        number of bins is 25 as same as [uijlings_ijcv2013_draft.pdf]

        extract HSV
    """

    BINS = 25
    hist = numpy.array([])

    for colour_channel in (0, 1, 2):
        # extracting one colour channel
        c = img[:, colour_channel]

        # calculate histogram for each colour and join to the result
        hist = numpy.concatenate(
            [hist] + [numpy.histogram(c, BINS, (0.0, 255.0))[0]])

    # L1 normalize
    hist = hist / len(img)

    return hist


def _calc_texture_gradient(img):
    """
        calculate texture gradient for entire image

        The original SelectiveSearch algorithm proposed Gaussian derivative
        for 8 orientations, but we use LBP instead.

        output will be [height(*)][width(*)]
    """
    ret = numpy.zeros((img.shape[0], img.shape[1], img.shape[2]))

    for colour_channel in (0, 1, 2):
        ret[:, :, colour_channel] = skimage.feature.local_binary_pattern(
            img[:, :, colour_channel], 8, 1.0)

    return ret


def _calc_texture_hist(img):
    """
        calculate texture histogram for each region

        calculate the histogram of gradient for each colours
        the size of output histogram will be
            BINS * ORIENTATIONS * COLOUR_CHANNELS(3)
    """
    BINS = 10

    hist = numpy.array([])

    for colour_channel in (0, 1, 2):
        # mask by the colour channel
        fd = img[:, colour_channel]

        # calculate histogram for each orientation and concatenate them all
        # and join to the result
        hist = numpy.concatenate(
            [hist] + [numpy.histogram(fd, BINS, (0.0, 1.0))[0]])

    # L1 Normalize
    hist = hist / len(img)

    return hist


def _extract_regions(img):
    R = {}

    # get hsv image
    hsv = skimage.color.rgb2hsv(img[:, :, :3])

    # pass 1: count pixel positions  获取各个区域的范围(坐标)
    for y, i in enumerate(img):

        for x, (r, g, b, l) in enumerate(i):

            # initialize a new region 设置初始值
            if l not in R:
                R[l] = {
                    "min_x": 0xffff, "min_y": 0xffff,
                    "max_x": 0, "max_y": 0, "labels": 1}

            # bounding box
            if R[l]["min_x"] > x:
                R[l]["min_x"] = x
            if R[l]["min_y"] > y:
                R[l]["min_y"] = y
            if R[l]["max_x"] < x:
                R[l]["max_x"] = x
            if R[l]["max_y"] < y:
                R[l]["max_y"] = y

    # pass 2: calculate texture gradient  计算全图的纹理的梯度(hsv三个通道)
    tex_grad = _calc_texture_gradient(img)

    # pass 3: calculate colour histogram of each region
    # 计算这个区域的相关属性
    for k, v in R.items():
        # colour histogram
        # 获取当前区域k在原始图像上的像素点组成的一个3通道的图像(2维矩阵,1维是大小,2维是通道)
        # fixme 是将每个通道的像素值拉成了1列。(这样才能求该通道的颜色直方图)
        masked_pixels = hsv[:, :, :][img[:, :, 3] == k]
        # 获取大小,但是记住,这个不是矩形框的大小,只是矩形框内的轮廓区域大小
        R[k]["size"] = len(masked_pixels / 4)
        # 获取各个通道的直方图特征信息(颜色)
        R[k]["hist_c"] = _calc_colour_hist(masked_pixels)

        # texture histogram
        # 获取各个通道的直方图特征信息(纹理)
        R[k]["hist_t"] = _calc_texture_hist(tex_grad[:, :][img[:, :, 3] == k])

    return R


def _extract_neighbours(regions):
    def intersect(a, b):
        if (a["min_x"] <= b["min_x"] <= a["max_x"]
            and a["min_y"] <= b["min_y"] <= a["max_y"]) or (
                a["min_x"] <= b["max_x"] <= a["max_x"]
                and a["min_y"] <= b["max_y"] <= a["max_y"]) or (
                a["min_x"] <= b["min_x"] <= a["max_x"]
                and a["min_y"] <= b["max_y"] <= a["max_y"]) or (
                a["min_x"] <= b["max_x"] <= a["max_x"]
                and a["min_y"] <= b["min_y"] <= a["max_y"]):
            return True
        return False

    R = regions.items()
    r = [elm for elm in R]
    R = r
    neighbours = []
    for cur, a in enumerate(R[:-1]):
        for b in R[cur + 1:]:
            if intersect(a[1], b[1]):
                neighbours.append((a, b))

    return neighbours


def _merge_regions(r1, r2):
    new_size = r1["size"] + r2["size"]
    rt = {
        "min_x": min(r1["min_x"], r2["min_x"]),
        "min_y": min(r1["min_y"], r2["min_y"]),
        "max_x": max(r1["max_x"], r2["max_x"]),
        "max_y": max(r1["max_y"], r2["max_y"]),
        "size": new_size,
        "hist_c": (
                          r1["hist_c"] * r1["size"] + r2["hist_c"] * r2["size"]) / new_size,
        "hist_t": (
                          r1["hist_t"] * r1["size"] + r2["hist_t"] * r2["size"]) / new_size,
        "labels": r1["labels"] + r2["labels"]  # 代表该框合并过1次。
    }
    return rt


def selective_search(im_orig, scale=1.0, sigma=0.8, min_size=50):
    '''Selective Search
    Parameters
    ----------
        im_orig : ndarray
            Input image
        scale : int
            Free parameter. Higher means larger clusters in felzenszwalb segmentation.
        sigma : float
            Width of Gaussian kernel for felzenszwalb segmentation.
        min_size : int
            Minimum component size for felzenszwalb segmentation.
    Returns
    -------
        img : ndarray
            image with region label
            region label is stored in the 4th value of each pixel [r,g,b,(region)]
        regions : array of dict
            [
                {
                    'rect': (left, top, right, bottom),
                    'labels': [...]
                },
                ...
            ]
    '''
    # 断言,要求输入的图像im_orig要求格式必须为3通道的。
    assert im_orig.shape[2] == 3, "3channels image is expected"

    # load image and get smallest regions
    # region label is stored in the 4th value of each pixel [r,g,b,(region)]
    # fixme 1、使用felzenszwalb生成原始的细粒度的区域信息,返回值和原始图像大小一致,但是是4通道的。[r,g,b,(region)], 形状是:[高,宽, 4]
    img = _generate_segments(im_orig, scale, sigma, min_size)

    if img is None:
        return None, {}

    # 计算图像的大小(图像中的像素的个数)
    imsize = img.shape[0] * img.shape[1]
    # fixme 2、基于提取出来的信息,计算各个区域的坐标信息(因为felzenszwalb仅返回这个轮廓信息)
    R = _extract_regions(img)

    # extract neighbouring information
    # 计算相近的邻居
    neighbours = _extract_neighbours(R)

    # calculate initial similarities
    # fixme 3、计算各个邻居区域的相似度
    S = {}
    for (ai, ar), (bi, br) in neighbours:
        S[(ai, bi)] = _calc_sim(ar, br, imsize)

    # hierarchal search
    # fixme 4、合并区域
    while S != {}:

        # 对S以相识度进行排序,get highest similarity
        # i, j = sorted(S.items(), cmp=lambda a, b: cmp(a[1], b[1]))[-1][0]
        i, j = sorted(list(S.items()), key=lambda a: a[1])[-1][0]

        # merge corresponding regions
        # 合并新区域
        t = max(R.keys()) + 1.0
        R[t] = _merge_regions(R[i], R[j])

        # # TODO: 自己加一个(额外加的), 删除合并前的i、j区域
        # del R[i]
        # del R[j]

        # 获取需要删除的键值对(邻居区域):删除S中其他邻居对中有i or j的,因为i和j被合并了。
        # mark similarities for regions to be removed
        key_to_delete = []
        for k, v in S.items():
            if (i in k) or (j in k):
                key_to_delete.append(k)

        # 做一个删除操作
        # remove old similarities of related regions
        for k in key_to_delete:
            del S[k]

        # calculate similarity set with the new region
        # 计算临近区域的相似度
        for k in filter(lambda a: a != (i, j), key_to_delete):
            # 得到临近区域的下标
            n = k[1] if k[0] in (i, j) else k[0]
            # 计算新区域和邻近区域的相似度
            S[(t, n)] = _calc_sim(R[t], R[n], imsize)

    # 获取区域信息
    regions = []
    for k, r in R.items():
        regions.append({
            'rect': (
                r['min_x'], r['min_y'],
                r['max_x'] - r['min_x'], r['max_y'] - r['min_y']),
            'size': r['size'],
            'labels': r['labels']
        })

    return img, regions


if __name__ == '__main__':
    # img_path = './images/000129.jpg'
    img_path = './images/11.png'
    img = cv.imread(img_path)
    print("开始ss候选框获取....")
    img_lbl, regions = selective_search(img, scale=1000, sigma=0.9, min_size=100)
    print(regions)
    print("完成候选框的获取....")
    print(img_lbl.shape)

    show_image = img.copy()
    for k, region in enumerate(regions):
        x, y, w, h = region['rect']  # 获取候选框的左上角坐标 和 高宽
        x, y, w, h = int(x), int(y), int(w), int(h)
        show_image = cv.rectangle(show_image, pt1=(x, y), pt2=(w + x, h + y), color=[0, 255, 0])

        # 截取API
        tmp_img = img[y:y + h, x:x + w, :]
        cv.imwrite('./output/img_{}.jpg'.format(k), tmp_img)

    cv.imshow('image', img)
    cv.imshow('show_image', show_image)
    cv.imshow('im_mask', img_lbl[:, :, 3])
    cv.waitKey(0)
    cv.destroyAllWindows()

发布了125 篇原创文章 · 获赞 2 · 访问量 2622

猜你喜欢

转载自blog.csdn.net/HJZ11/article/details/104734104