【学习笔记】pyQt5学习笔记(2)——第一个图像识别demo

最近做的任务是将图像识别一些列的东西进行封装,制作成GUI程序。目前使用的是Google开源的Object Detection API。

上手先实现一个简单的小程序,将上述API中的jupyter note book的识别代码进行封装。实现其实很简单,传递了文件地址,用户输入识别类目及识别图片总数。(这里因为官方的代码需要图片名称为image*.jpg的形式,且通过一个for循环遍历文件夹中的图片,因此需要用户每次都输入确切的图片数目)

软件的整体代码如下:

# -*- coding: utf-8 -*-
# @author:kangshifu
# 20181128版,用户指定欲识别图片数目即可开始识别
# 缺陷:1.对图片文件名称有要求 2.程序会自动运行,没有用户点选按钮的过程
# 3.目前只能读取本地图片,还不能调用摄像头实时识别
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtWidgets import *
from PyQt5.QtCore import *
from PyQt5.QtGui import *

# Python 2.x与Python 3.x在定义类时的区别,Python2.x中()内有object
# 形如 class UiForm(object):
class UiForm():
    
    def setupUi(self, Form):
        Form.setObjectName("Form")
        Form.resize(600, 200)
        Form.setMinimumSize(QtCore.QSize(600, 200))
        Form.setMaximumSize(QtCore.QSize(600, 200))
        self.frame = QtWidgets.QFrame(Form)
        self.frame.setGeometry(QtCore.QRect(20, 20, 550, 100))
        self.frame.setFrameShape(QtWidgets.QFrame.StyledPanel)
        self.frame.setFrameShadow(QtWidgets.QFrame.Raised)
        self.frame.setObjectName("frame")
        self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.frame)
        self.horizontalLayout_2.setObjectName("horizontalLayout_2")
        # 加载模型文件按钮
        self.btn_add_file = QtWidgets.QPushButton(self.frame)
        self.btn_add_file.setObjectName("btn_add_file")
        self.horizontalLayout_2.addWidget(self.btn_add_file)
        # 加载pbtxt文件按钮
        self.btn_add_pbtxt = QtWidgets.QPushButton(self.frame)
        self.btn_add_pbtxt.setObjectName("btn_add_pbtxt")
        self.horizontalLayout_2.addWidget(self.btn_add_pbtxt)
        # 打开图片所在文件夹按钮
        self.btn_open_cam = QtWidgets.QPushButton(self.frame)
        self.btn_open_cam.setObjectName("btn_open_cam")
        self.horizontalLayout_2.addWidget(self.btn_open_cam)
        # 输入检测类别数目按钮
        self.btn_enter = QtWidgets.QPushButton(self.frame)
        self.btn_enter.setObjectName("btn_enter")
        self.horizontalLayout_2.addWidget(self.btn_enter)
        # 输入检测识别图片数+1
        self.btn_enter_img = QtWidgets.QPushButton(self.frame)
        self.btn_enter.setObjectName("btn_enter_img")
        self.horizontalLayout_2.addWidget(self.btn_enter_img)
        # 退出按钮
        self.btn_exit = QtWidgets.QPushButton(self.frame)
        self.btn_exit.setObjectName("btn_exit")
        self.horizontalLayout_2.addWidget(self.btn_exit)
        
    
        self.retranslateUi(Form)
        # 这里将按钮和定义的动作相连,通过click信号连接openfile槽?
        self.btn_add_file.clicked.connect(self.openpb)
        # 用于打开pbtxt文件
        self.btn_add_pbtxt.clicked.connect(self.openpbtxt)
        # 用于打开文件夹
        self.btn_open_cam.clicked.connect(self.opendir)
        # 用于用户输入类别数
        self.btn_enter.clicked.connect(self.enter_num_cls)
        # 用于用户输入图片数
        self.btn_enter_img.clicked.connect(self.enter_img)
        # 这里是将btn_exit按钮和Form窗口相连,点击按钮发送关闭窗口命令
        self.btn_exit.clicked.connect(Form.close)
        QtCore.QMetaObject.connectSlotsByName(Form)

    def retranslateUi(self, Form):
        _translate = QtCore.QCoreApplication.translate
        Form.setWindowTitle(_translate("Form", "目标检测"))
        self.btn_add_file.setText(_translate("Form", "加载模型文件"))
        self.btn_open_cam.setText(_translate("Form", "打开图片文件夹"))
        self.btn_add_pbtxt.setText(_translate("Form", "加载pbtxt文件"))
        self.btn_enter.setText(_translate("From", "指定识别类别数"))
        self.btn_enter_img.setText(_translate("From", "输入识别图片总张数"))
        self.btn_exit.setText(_translate("Form", "退出"))
        # ~ self.lab_img_show.setText(_translate("Form", "..."))

	## 这里定义了打开文件的事件,使用QFileDialog.getOpenFileName方法打开单个文件
	## 此外,QFileDialog.getOpenFileNames可以打开多个文件
	##  QFileDialog.getExistingDirectory()可以打开文件夹
	## 值得注意的是,getOpenFileName(parent,caption,dir,filter)中的参数定义
	## parent指定父组件,个人理解:与谁连接就写谁;caption选取文件时对话框的标题;dir打开的默认目录;filter文件后缀名过滤器
	## 这里测试加载一张图片
    def openpb(self):
        # 经测试,若没有_,openfile_name返回类型为一个元组,返回:('/home/kanghao/QT_learning/python.jpg', 'image files(*.jpg)')
        # 通过_,openfile_name返回str类型,/home/kanghao/QT_learning/python.jpg
        # 将该变量写入QPixmap()中,最终通过setPixmap方法通过lable显示图片
        openfile_name_pb, _ = QFileDialog.getOpenFileName(self.btn_add_file,'选择pb文件','/home/kanghao/','pb_files(*.pb)')
        #print(openfile_name_pb)
        return openfile_name_pb
        
    def opendir(self):
        opendir_name = QFileDialog.getExistingDirectory(self.btn_open_cam,'选择识别图片文件夹','./')
        #print(opendir_name)
        return opendir_name
        
    def openpbtxt(self):
        openfile_name_pbtxt, _ = QFileDialog.getOpenFileName(self.btn_add_pbtxt,'选择pbtxt文件','/home/kanghao/','pbtxt_files(*.pbtxt)')
        #print(openfile_name_pbtxt)
        return openfile_name_pbtxt
        
    def enter_num_cls(self):
        #QInputDialog.getInt参数:父组件,弹窗名,提示字符,起始显示字符,最小值,最大值,步长
        n, okPressed = QInputDialog.getInt(self.btn_enter,'指定训练类别数','你的目标有多少类?',1,1,28,1)
        if okPressed:
            #print(n)
            return n
            
    def enter_img(self):
        #QInputDialog.getInt参数:父组件,弹窗名,提示字符,起始显示字符,最小值,最大值,步长
        m, okPressed = QInputDialog.getInt(self.btn_enter_img,'输入识别图片总数(严格吻合!!)','你的想识别多少张图片',1,1,9999,1)
        if okPressed:
            #print(n)
            x = m + 1
            print(x)
            print(type(x))
            return x
            
## 用于显示ui界面的命令
if __name__ == "__main__":
	app = QtWidgets.QApplication(sys.argv)
	MainWindow = QtWidgets.QMainWindow()
	# ui为根据类Ui_From()创建的实例
	ui = UiForm()
	ui.setupUi(MainWindow)
	MainWindow.show()
	#sys.exit(app.exec_())
##--------------------------------------------------------------------##
sys.path.append("..")
from object_detection.utils import ops as utils_ops

if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
	raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')
  
from utils import label_map_util

from utils import visualization_utils as vis_util

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = ui.openpb()

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = ui.openpbtxt()

NUM_CLASSES = ui.enter_num_cls()

detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')
    
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)
    
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = ui.opendir()
# 这里读取图片方式还需要优化
# 改动方向1.不限制文件名为imagex.jpg的形式 2.读取摄像头信息
y = ui.enter_img()
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, y) ]
print(TEST_IMAGE_PATHS)
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

def run_inference_for_single_image(image, graph):
  with graph.as_default():
    with tf.Session() as sess:
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
      ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
              tensor_name)
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

      # Run inference
      output_dict = sess.run(tensor_dict,
                             feed_dict={image_tensor: np.expand_dims(image, 0)})

      # all outputs are float32 numpy arrays, so convert types as appropriate
      output_dict['num_detections'] = int(output_dict['num_detections'][0])
      output_dict['detection_classes'] = output_dict[
          'detection_classes'][0].astype(np.uint8)
      output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
      output_dict['detection_scores'] = output_dict['detection_scores'][0]
      if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
  return output_dict
  
for image_path in TEST_IMAGE_PATHS:
  print(image_path)
  image = Image.open(image_path)
  # the array based representation of the image will be used later in order to prepare the
  # result image with boxes and labels on it.
  image_np = load_image_into_numpy_array(image)
  # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
  image_np_expanded = np.expand_dims(image_np, axis=0)
  # Actual detection.
  output_dict = run_inference_for_single_image(image_np, detection_graph)
  # Visualization of the results of a detection.
  vis_util.visualize_boxes_and_labels_on_image_array(
      image_np,
      output_dict['detection_boxes'],
      output_dict['detection_classes'],
      output_dict['detection_scores'],
      category_index,
      instance_masks=output_dict.get('detection_masks'),
      use_normalized_coordinates=True,
      line_thickness=8)
  plt.figure(figsize=IMAGE_SIZE)
  plt.imshow(image_np)
  plt.savefig(str(image_path)+".jpg")
## 目标检测代码结束
##--------------------------------------------------------------------##
    
sys.exit(app.exec_())  

存在的问题都写在了程序的注释中,待进一步完善。

猜你喜欢

转载自blog.csdn.net/yourgreatfather/article/details/84583121