Tensorflow C++ API调用Keras模型实现RGB图像语义分割

我的实验是基于PSPNet模型实现二维图像的语义分割，下面的代码直接从得到的h5文件开始往下做。。。

也不知道是自己的检索能力出现了问题还是咋回事，搜遍全网都没有可以直接拿来用的语义分割代码，东拼西凑，算是搞成功了。

实验平台：Windows、VS2015、Tensorflow1.8 api、Python3.6

具体的流程为：keras训练模型 --> model.h5 --> 转换成.pb文件 --> tensorflow 载入.pb 验证正确性 --> tensorflow C++ api调用 .pb文件

1. 将训练好的h5模型转换为pb文件

# convert .h5 to .pb
import tensorflow as tf
from tensorflow.python.framework import graph_io
from keras import backend as K
from nets.pspnet import pspnet
from keras.models import load_model
from keras.models import Model, load_model
from keras.models import model_from_json

def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
   from tensorflow.python.framework.graph_util import convert_variables_to_constants
   graph = session.graph
   with graph.as_default():
       freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
       output_names = output_names or []
       output_names += [v.op.name for v in tf.global_variables()]
       input_graph_def = graph.as_graph_def()
       if clear_devices:
           for node in input_graph_def.node:
               node.device = ""
       frozen_graph = convert_variables_to_constants(session, input_graph_def, output_names, freeze_var_names)
   return frozen_graph

K.set_learning_phase(0)


keras_model = load_model('./model.h5')
/*如果h5不包含模型结构，需要先load json然后再load weights
json_file = '/model.json'
with open(json_file, 'r') as f:
  json_str = f.read()
model = model_from_json(json_str)
keras_model.load_weights('./model.h5')
*/

# .inputs和.outputs非常重要，需要记录下来
print('Inputs are:', keras_model.inputs)
print('Outputs are:', keras_model.outputs)
// 保存pb文件
frozen_graph = freeze_session(K.get_session(), output_names=[keras_model.output.op.name])
graph_io.write_graph(frozen_graph, "./", "model.pb", as_text=False)

2. 在Python中验证pb文件是否可用

import numpy as np
import tensorflow as tf
import cv2
from PIL import Image
//INPUT_TENSOR_NAME和OUTPUT_TENSOR_NAME 是基于1中的结果
INPUT_TENSOR_NAME = 'input_1:0'
OUTPUT_TENSOR_NAME = 'main/truediv:0'
INPUT_SIZE = 473
colors = [(73, 73, 73), (0, 255, 255), (255, 255, 0)]
num_classes = 3

with tf.gfile.FastGFile('./model.pb', "rb") as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
    g_in = tf.import_graph_def(graph_def, name="")
sess = tf.Session(graph=g_in)


def run(image):
    height, width = image.shape[0:2]
    # 归一化很重要，卡了我1天多
    image = (image - image.min())/(image.max() - image.min())
    resized_image = cv2.resize(image, (INPUT_SIZE, INPUT_SIZE))

    input_x = sess.graph.get_tensor_by_name(INPUT_TENSOR_NAME)
    out_softmax = sess.graph.get_tensor_by_name(OUTPUT_TENSOR_NAME)
    
    batch_seg_map = sess.run(
        out_softmax,
        feed_dict={
    
    input_x: [np.asarray(resized_image)]})
   # batch_seg_map是[1,473,473,3]，batch_seg_map[0]尺寸是[473,473,3]
   # seg_map 便是预测结果
    seg_map = batch_seg_map[0]
    seg_map = seg_map.argmax(axis=-1).reshape([INPUT_SIZE, INPUT_SIZE])
    
    seg_img = np.zeros((np.shape(seg_map)[0], np.shape(seg_map)[1], 3))
	# 根据seg_map 中每个像素的值来赋予mask颜色
    for c in range(num_classes):
        seg_img[:, :, 0] += ((seg_map[:, :] == c) * (colors[c][0])).astype('uint8')
        seg_img[:, :, 1] += ((seg_map[:, :] ==c) * (colors[c][1])).astype('uint8')
        seg_img[:, :, 2] += ((seg_map[:, :] == c) * (colors[c][2])).astype('uint8')

    image = cv2.resize(seg_img, (int(width), int(height)))
    return image


input_image = cv2.imread('./img/image.jpg')
seg_map = run(input_image)
cv2.imwrite("./out.jpg", seg_map)

3. 在C++程序中调用pb文件

坑太多，小小的错误可能就得不到预期结果。

#include <fstream>
#include <utility>
#include <iostream>
#include <vector>
#include <opencv2/opencv.hpp>
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/graph/default_device.h"

using namespace std;
using namespace cv;

#define INPUT_W 473
#define INPUT_H 473

// Define a function to convert Opencv's Mat data to tensorflow tensor. In python, just perform np.reshape on the matrix read in cv2.imread(), and the data type becomes a tensor, that is, tensor is the same as a matrix. Then you can even enter the entrance of the network
// In the C++ version, the input of the network also needs to be a tensor data type, so the input image needs to be converted into a tensor. If you use Opencv to read the image, the format is a Mat, you need to consider how to convert a Mat to tensor
void CVMat_to_Tensor(cv::Mat& img, tensorflow::Tensor& output_tensor, int input_rows, int input_cols)
{
    
    
	Mat resize_img;
	resize(img, resize_img, cv::Size(input_cols, input_rows));

	Mat dst = resize_img.reshape(1, 1);
	// 第二个坑 rgb图像的归一化
	for (int i = 0; i < dst.cols; i++) {
    
    
		dst.at<float>(0, i) = dst.at<float>(0, i) / 255.0;
	}
	resize_img = dst.reshape(3, INPUT_H);

	float * p = (&output_tensor)->flat<float>().data();
	cv::Mat tempMat(input_rows, input_cols, CV_32FC3, p);
	resize_img.convertTo(tempMat, CV_32FC3);

}

void tensor2Mat(tensorflow::Tensor &t, cv::Mat &image) {
    
    
	float *p = t.flat<float>().data();
	image = Mat(INPUT_H, INPUT_W, CV_32FC3, p);
	image.convertTo(image, CV_8UC3);

}


int main(int argc, char ** argv)
{
    
    
	/* --------------------Configuration key information------------------------- -----------*/
	std::string model_path = "./psp_1w_resnet50_wudongjie.pb"; // pb model address
	std::string image_path = "./model/image.jpg"; // test picture
	int input_height = INPUT_H; // Enter the image height of the network
	int input_width = INPUT_W; // input network image width
	std::string input_tensor_name = "input_1:0"; // The name of the input node of the network
	std::string output_tensor_name = "main/truediv:0"; // The name of the output node of the network

	/* --------------------Create session------------------------------------*/
	tensorflow::Session * session;
	tensorflow::Status status = tensorflow::NewSession(tensorflow::SessionOptions(), &session); // Create a new session Session

	/* --------------------Read model from pb file------------------------------------*/
	tensorflow::GraphDef graphdef; //Define a graph for the current model
	tensorflow::Status status_load = tensorflow::ReadBinaryProto(tensorflow::Env::Default(), model_path, &graphdef); // read graph model from pb file
	if (!status_load.ok()) // Determine whether the read model is correct, if it is wrong, print out the wrong information
	{
    
    
		std::cout << "ERROR: Loading model failed..." << model_path << std::endl;
		std::cout << status_load.ToString() << "\n";
		return -1;
	}

	tensorflow::Status status_create = session->Create(graphdef); // Import the model into the session Session
	if (!status_create.ok()) // Determine whether the model is imported into the session successfully, if it is wrong, print out the error message
	{
    
    
		std::cout << "ERROR: Creating graph in session failed..." << status_create.ToString() << std::endl;
		return -1;
	}
	std::cout << "<------Sucessfully created session and load graph------>" << std::endl;

	/* --------------------Load test picture------------------------ ------------*/
	cv::Mat img = cv::imread(image_path, -1); // read image, read grayscale image
	img.convertTo(img, CV_32FC3);//第一个小坑，整个程序都读取Float

	if (img.empty())
	{
    
    
		std::cout << "can't open the image!!!!!" << std::endl;
		return -1;
	}
	// Create a tensor as the input network interface
	tensorflow::Tensor resized_tensor(tensorflow::DT_FLOAT, 		    tensorflow::TensorShape({
    
    1, input_height,input_width, 3}));

	// Save the Mat format picture read by opencv into tensor
	CVMat_to_Tensor(img, resized_tensor, input_height, input_width);
	std::cout << resized_tensor.DebugString() << std::endl;

	/* --------------------Test with network------------------------ ------------*/
	std::cout << std::endl << "<------------------Runing the model with test_image------------------->" << std::endl;
	// Run forward, the output result must be a vector of tensor

	std::vector<tensorflow::Tensor> outputs;
	std::string output_node = output_tensor_name; // output node name
	tensorflow::Status status_run = session->Run({
    
     {
    
     input_tensor_name, resized_tensor } }, {
    
     output_node }, {
    
    }, &outputs);
	if (!status_run.ok())
	{
    
    
		std::cout << "ERROR: Run failed..." << std::endl;
		std::cout << status_run.ToString() << std::endl;
		return -1;
	}
	// Extract the output value
	std::cout << "Output tensor size: " << outputs.size() << std::endl;
	for (std::size_t i = 0; i < outputs.size(); i++)
	{
    
    
		std::cout << outputs[i].DebugString() << std::endl;
	}
	tensorflow::Tensor t = outputs[0];

	Mat outimage;
	tensor2Mat(t, outimage);

	int output_height = t.shape().dim_size(1);
	int output_width = t.shape().dim_size(2);

	int colors[3][3] = {
    
     {
    
     73, 73, 73 },{
    
     0, 255, 255 },{
    
     255, 255, 0 } };
	//根据one-hot编码实现每个像素的分类，选取三通道中概率最大值最为分类结果，并赋予颜色
	for (int i = 0; i < output_height; i++)
	{
    
    
		for (int j = 0; j < output_width; j++)
		{
    
    
			int index = 0;
			for (int k = 1; k < 3; k++) {
    
    				
				if ((float)outimage.at<Vec3b>(i, j)[k] >(float)outimage.at<Vec3b>(i, j)[k - 1]) {
    
    
					index = k;
				}
			}
			
			if (index ==0) {
    
    
				outimage.at<Vec3b>(i, j)[0] = colors[0][0];
				outimage.at<Vec3b>(i, j)[1] = colors[0][1];
				outimage.at<Vec3b>(i, j)[2] = colors[0][2];
			}
			else if (index == 1) {
    
    
				outimage.at<Vec3b>(i, j)[0] = colors[1][0];
				outimage.at<Vec3b>(i, j)[1] = colors[1][1];
				outimage.at<Vec3b>(i, j)[2] = colors[1][2];
			}
			else {
    
    
				outimage.at<Vec3b>(i, j)[0] = colors[2][0];
				outimage.at<Vec3b>(i, j)[1] = colors[2][1];
				outimage.at<Vec3b>(i, j)[2] = colors[2][2];
			}
		}
	}
	// 记得最后要resize到原图像大小
	resize(outimage, outimage, cv::Size(img.cols, img.rows));
	imshow("img", outimage);
	waitKey();

	return 0;
}

tensorflow::Tensor::DebugString()只能输出第一组数据，像这样：
在这里插入图片描述
如果想遍历tensor中所有元素的值，可以使用 auto tensor_map = resized_tensor.tensor<float, 4>，然后来三个for循环就可以了，获取某一索引元素也是可以的

for (int y = 0; y < input_height; ++y) {
    for (int x = 0; x < input_width; ++x) {
        for (int c = 0; c < 3; ++c) {
          cout<<tensor_map (0, y, x, c)<<endl;
        }
    }
}

经过反复尝试，代码是没问题的，但是还有很大的优化空间，后期会用CUDA把C++程序重写一遍，但是最耗时的session->Run这一预测过程目前还不知道怎么加速。希望大家提点建议，一起进步。

Tensorflow C++ API调用Keras模型实现RGB图像语义分割

我的实验是基于PSPNet模型实现二维图像的语义分割，下面的代码直接从得到的h5文件开始往下做。。。

1. 将训练好的h5模型转换为pb文件

2. 在Python中验证pb文件是否可用

3. 在C++程序中调用pb文件

猜你喜欢