上一篇文章讲的是1.0怎么批量识别,这一篇则讲解2.0的实现细节
两个版本功能细分如下:
1.0 支持批量识别图片并输出csv文件
2.0 包含1.0所有功能,并支持选定识别区域,单个识别和批量识别功能,支持实时显示识别结果
下面是运行图
以下是需求说明
选择区域就是在图像上拖一个框,一张图片可能会需要识别多个,所以要支持拖动几个框
如果拖错了,就需要有删除框的功能
选完之后,需要测试一下是否能正确识别,有时候框拉得大了,识别效果可能不理想,则需要重新框一次
支持断点续识功能,运行到中途可以关闭,下次打开能继续上一次进度
以下是技术实现细节
图形界面是使作PyQt5实现的
显示图片使用的控件是QLabel,图片的选取框则是使用了鼠标的点击事件加绘图API来实现,具体代码如下
class ImgLabel(QLabel):
def __init__(self,parent=None):
super(ImgLabel,self).__init__(parent)
self.x0=0
self.y0=0
self.x1=0
self.y1=0
self.flag=False #是否点击
self.move=False #是否移动
self.pointList=[]
self.pointCount=0
self.set=QSettings("./point.ini",QSettings.IniFormat)
self.set.setIniCodec('UTF-8')
if self.set.contains("pointCount"):
self.pointCount=int(self.set.value("pointCount"))
for i in range(0,self.pointCount):
p=self.set.value("point"+str(i))
p=p.split(",")
p=[int(i) for i in p]
if len(p)==4:
self.pointList.append(p)
self.update()
# 鼠标移动事件
def mouseMoveEvent(self, event):
# barHeight = self.bar.height()
self.move = True
if self.flag:
self.x1 = event.pos().x()
self.y1 = event.pos().y()
self.update()
# 鼠标释放事件
def mouseReleaseEvent(self, event):
self.flag = False
self.move=False
if not self.x1 == 0 and not self.y1 == 0:
point=(self.x0,self.y0,self.x1,self.y1)
self.pointList.append(point)
self.set.setValue("point"+str(self.pointCount),",".join(str(i) for i in point))
self.pointCount+=1
self.set.setValue("pointCount",self.pointCount)
print(self.x0, self.y0, self.x1, self.y1)
self.x0, self.y0, self.x1, self.y1=(0,0,0,0)
self.update()
def paintEvent(self, event):
super().paintEvent(event)
if self.flag and self.move: # 只有当鼠标按下并且移动状态
rect = QRect(self.x0, self.y0, abs(self.x1 - self.x0), abs(self.y1 - self.y0))
painter = QPainter(self)
painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
painter.drawRect(rect)
# print(self.x0, self.y0, self.x1, self.y1)
if not self.flag and not self.move:
print(len(self.pointList))
painter = QPainter(self)
painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
for point in self.pointList:
print("point=",point)
x0,y0,x1,y1=point
rect = QRect(x0,y0, abs(x1-x0), abs(y1-y0))
painter.drawRect(rect)
# 单击鼠标触发事件
def mousePressEvent(self, event):
# barHeight = self.bar.height()
self.x0 = event.pos().x()
self.y0 = event.pos().y()
self.flag = True
#删除所有的框
def clearPoint(self):
self.pointList=[]
self.pointCount=0
self.set.setValue("pointCount",self.pointCount)
#删除最近添加的框点位
def deletePoint(self):
if len(self.pointList)>0:
self.pointList.pop()
self.pointCount-=1
self.set.setValue("pointCount",self.pointCount)
for a in self.pointList:
print(a)
self.update()
#获取所有框的点位
def getPointList(self):
return self.pointList
通过继续QLabel,重载了它的鼠标点击事件,并将拖动到的框保存到了point.ini里面,方便下一次启动读取,还可以手动修改框的位置,以支持更精细化的框选
点击选择文件后,通过文件选择器,选取文件,并将图像绘制到QLabel上
读取图片并显示图片用了opencv的方法,如果直接读的话,会有问题,具体什么问题不记得了,以下是代码
def selectFileProc(self):
path=QFileDialog.getOpenFileName(self,"",os.getcwd(),"")[0]
self.fileList=[]
print(path)
self.fileList.append(path)
self.showImg(path)
def showImg(self,first):
self.pathLabel.setText(first)
try:
Img=cv2.imread(first) #通过opencv读入一张图片
image_height, image_width, image_depth=Img.shape #读取图像高宽深度
except:
return None,0
QIm=cv2.cvtColor(Img,cv2.COLOR_BGR2RGB)
QIm=QImage(QIm.data, image_width, image_height, image_width * image_depth,QImage.Format_RGB888)
self.imgLabel.setPixmap(QPixmap.fromImage(QIm))
self.imgLabel.update
return Img,1
单次运行代码
def test(self,testPath=None):
if not testPath:
testPath=self.fileList[0]
if not testPath.endswith(".jpg"):
print(testPath)
pan=cv2.imread(testPath)
#获取当前所有的选取框
pointList=self.imgLabel.getPointList()
text=""
#识别每个框内的图片
for point in pointList:
x0,y0,x1,y1=point
results=[]
try:
#切片之后识别
img=pan[y0:y1,x0:x1]
results=reader.readtext(img)
print(results)
except:
pass
#汇总识别结果
for i in range(0,len(results)):
result=results[i]
x=result[1] if len(result)>2 else ""
text=text+x+","
self.pointLabel.setText(text)
return text
批量识别代码
def run(self):
fd=open("result.csv","w")
self.table.clear()
#遍历当前目录所有的图片
for i in range(0,len(self.fileList)):
#进度条显示
self.processLabel.setText("%s/%s"%(i,len(self.fileList)))
path=self.fileList[i]
name=path.split("/")[-1]
proc=i/len(self.fileList)*100
self.process.setValue(proc)
text=""
#断点续识关键代码
if os.path.exists("result/"+name+".result"):
text=name+","+open("result/"+name+".result","r").read()
else:
img,ret=self.showImg(path)
if ret:
text=self.test(path)
#每次识别完之后将结果保存到单独的文件里面,实现断点续识功能
if not text=="":
open("result/"+name+".result","w").write(text)
text=name+","+text
if not text=="":
fd.write(text+"\n")
self.table.addData(name,text)
#识别过程中,需要刷新界面,不然会看到界面卡顿,毕竟这是信号槽,没有使用线程
QApplication.processEvents(QEventLoop.AllEvents, 1);
fd.close()
print("run")
识别结果分两部份,一个是csv文件,一个是展示在table里面
如果表格里面有些记录不太好,则可以右键删除,下次运行时就可以重新识别
以下是代码
class ResultTable(QTableView):
def __init__(self,parent=None):
super(ResultTable,self).__init__(parent)
self.setContextMenuPolicy(Qt.CustomContextMenu)
self.customContextMenuRequested.connect(self.contextMenuRequested)
self.horizontalHeader().setSectionResizeMode(QHeaderView.ResizeToContents)
self.currentRow=-1
header=["fileName","data"]
self.model=QStandardItemModel()
self.model.setHorizontalHeaderLabels(header)
self.setModel(self.model)
def addData(self,name,data):
count=self.model.rowCount()
item=QStandardItem(name)
self.model.setItem(count,0,item)
item=QStandardItem(data)
self.model.setItem(count,1,item)
#self.scrollToBottom()
def contextMenuRequested(self,point):
row_num=-1
for i in self.selectionModel().selection().indexes():
row_num = i.row()
print(row_num)
break
self.currentRow=row_num
delAction=QAction("删除",self,triggered=self.delRow)
popMenu=QMenu(self)
popMenu.addAction(delAction)
popMenu.exec_(QCursor.pos())
def clear(self):
self.model.setRowCount(0)
def delRow(self):
row=self.currentRow
if row<0:
return
modelIndex=self.model.index(row,0)
item=self.model.data(modelIndex)
path="result/"+item+".result"
if os.path.exists(path):
os.remove(path)
self.model.removeRow(row)
if __name__=='__main__':
app=QApplication(sys.argv)
view=ResultTable()
view.addData("aa1","bbbb")
view.addData("aa2","bbbb")
view.addData("aa3","bbbb")
view.addData("aa4","bbbb")
view.addData("aa5","bbbb")
view.clear()
view.show()
app.exec_()