darknet-ocr text detection portion realized (darknet_ocr) alone

Run chineseocr (yolov3 + crnn) in a separate detection section (darknet_detect), due cuda version of the problem encountered:
OSError: libcudart.so.9.2: CAN not Open Shared Object File: No SUCH File or Directory
so used alone to detect darknet_ocr part of the source link: Add a link description

The link contains darknet framework text detection text.py script, in dnn directory, but due to the extra output demand and some of the problems encountered, so do some modifications, to be followed accompanied by a full set of codes for reference .

import cv2
import numpy as np
import time
from config import textPath, anchors
from helper.image import resize_img, get_origin_box, soft_max, reshape
from helper.detectors import TextDetector
from config import scale, maxScale, TEXT_LINE_SCORE
from dnn.image import rotate_cut_img, sort_box
from PIL import Image
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt


textNet   =  cv2.dnn.readNetFromDarknet(textPath.replace('weights','cfg'),textPath)

def detect_box(image, scale=600, maxScale=900):
    H, W = image.shape[:2]
    image, rate = resize_img(image, scale, maxScale=maxScale)
    h, w = image.shape[:2]
    inputBlob = cv2.dnn.blobFromImage(image, scalefactor=1.0, size=(w, h), swapRB=False, crop=False);
    outputName = textNet.getUnconnectedOutLayersNames()
    textNet.setInput(inputBlob)
    out = textNet.forward(outputName)[0]
    clsOut = reshape(out[:, :20, ...])
    boxOut = reshape(out[:, 20:, ...])
    boxes = get_origin_box((w, h), anchors, boxOut[0])
    scores = soft_max(clsOut[0])
    boxes[:, 0:4][boxes[:, 0:4] < 0] = 0
    boxes[:, 0][boxes[:, 0] >= w] = w - 1
    boxes[:, 1][boxes[:, 1] >= h] = h - 1
    boxes[:, 2][boxes[:, 2] >= w] = w - 1
    boxes[:, 3][boxes[:, 3] >= h] = h - 1
    print('scores:', scores)
    print('boxes:', boxes)
    print('rate:', rate)
    print('w:', w)
    print('h:', h)
    return scores, boxes, rate, w, h
timeTake = time.time()

def detect_lines(image, scale=600,
                 maxScale=900,
                 MAX_HORIZONTAL_GAP=30,
                 MIN_V_OVERLAPS=0.6,
                 MIN_SIZE_SIM=0.6,
                 TEXT_PROPOSALS_MIN_SCORE=0.7,
                 TEXT_PROPOSALS_NMS_THRESH=0.3,
                 TEXT_LINE_NMS_THRESH=0.9,
                 TEXT_LINE_SCORE=0.9
                 ):
    MAX_HORIZONTAL_GAP = max(16, MAX_HORIZONTAL_GAP)
    detectors = TextDetector(MAX_HORIZONTAL_GAP, MIN_V_OVERLAPS, MIN_SIZE_SIM)
    scores, boxes, rate, w, h = detect_box(image, scale, maxScale)
    size = (h, w)
    text_lines, scores = detectors.detect(boxes, scores, size, \
                                          TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH, TEXT_LINE_NMS_THRESH,
                                          TEXT_LINE_SCORE)
    if len(text_lines) > 0:
        text_lines = text_lines / rate
    print('text_lines:', text_lines)
    print('scores:', scores)
    return text_lines, scores

timeTake = time.time()-timeTake
print('It take:{}s'.format(timeTake))


def detect(img):
    image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    boxes, scores = detect_lines(image, scale=scale, maxScale=maxScale)
    data = []
    n = len(boxes)
    for i in range(n):
        box = boxes[i]
        box = [int(x) for x in box]
        if scores[i] > TEXT_LINE_SCORE:
            data.append({'box': box, 'prob': round(float(scores[i]), 2), 'text': None})
    res = {'data': data, 'errCode': 0}
    return res

def ocr_batch(img, boxes, leftAdjustAlph=0.01, rightAdjustAlph=0.01):
    """
    batch for ocr
    """
    im = Image.fromarray(img)
    newBoxes = []
    for index, box in enumerate(boxes):
        partImg, box = rotate_cut_img(im, box, leftAdjustAlph, rightAdjustAlph)
        box['img'] = partImg.convert('L')
        newBoxes.append(box)
        cvPartImg = np.array(partImg)
        #cvImg = cv2.cvtColor(cvPartImg, cv2.COLOR_RGB2BGR)
        cvImg = cv2.cvtColor(cvPartImg, cv2.COLOR_RGB2BGR)
        #cv2.imshow('part', cvImg)
        cv2.waitKey(0)
    #return res

def drawDetectBox(img, resJson):
    for idx in range(len(resJson['data'])):
        box = resJson['data'][idx]['box']
        [x1,y1,x2,y2,x3,y3,x4,y4] = box
        p1 = (int(x1), int(y1))
        p2 = (int(x2), int(y2))
        p3 = (int(x3), int(y3))
        p4 = (int(x4), int(y4))
        cv2.line(img, p1, p2, (0, 255, 0))
        cv2.line(img, p2, p3, (0, 255, 0))
        cv2.line(img, p3, p4, (0, 255, 0))
        cv2.line(img, p4, p1, (0, 255, 0))
        #cv2.putText(img, str(text_tags[idx]), (int(p1[0]), int(p1[1])), 1, 1, (0, 0, 255))
    #cv2.imshow('detect', img)
#    cv2.waitKey(0)


    #return box



def show_img(imgs: np.ndarray, color=True):
    if (len(imgs.shape) == 3 and color) or (len(imgs.shape) == 2 and not color):
        imgs = np.expand_dims(imgs, axis=0)
    for img in imgs:
        plt.figure()
        plt.imshow(img, cmap=None if color else 'gray')

imgDir = './test/'
img = cv2.imread(imgDir + 'img.jpeg')
res = detect(img)
print(res)
boxes = []
for idx in range(len(res['data'])):
    box = res['data'][idx]['box']
    boxes.append(box)
ocr_batch(img, boxes)
drawDetectBox(img, res)
cv2.imwrite('detect7.jpg', img)
show_img(img, color = True)
plt.show()


Statement runs linux system used since before cv2.imshow () in the code, the display: can not connect to X server, the statement required commented, followed by plt.show () replacement.
Run after the results obtained:
Here Insert Picture Description
Here Insert Picture Description
In addition to run the entire code in the last article in the same image, so the code is run text.py file:

docker run -v /.../OCR-DARKNET/darknet-ocr:/chineseocr/darknet-ocr -w /chineseocr/darknet-ocr chineseocr:v2 python text.py

Several versions of text.py changed to meet different needs, there is a need to discuss together, it may be different or the environment is problematic opencv library, if there is any discrepancy or update please feel free to discuss!

Released six original articles · won praise 60 · views 4520

Guess you like

Origin blog.csdn.net/csdnqq970820/article/details/104961172