# -*- coding: utf-8 -*-
import base64
import json
import urllib.request
ENCODING = 'utf-8'
def get_img_base64(img_file):
with open(img_file, 'rb') as infile:
s = infile.read()
return base64.b64encode(s).decode(ENCODING)
def predict(url, appcode, img_base64, kv_configure):
param = {}
param['image'] = img_base64
if kv_configure is not None:
param['configure'] = json.dumps(kv_configure)
body = json.dumps(param)
data = bytes(body, "utf-8")
headers = {'Authorization': 'APPCODE %s' % appcode}
request = urllib.request.Request(url=url, headers=headers, data=data)
request.add_header('Content-Type', 'application/json; charset=UTF-8')
try:
response = urllib.request.urlopen(request, timeout=10)
return response.code, response.headers, response.read()
except urllib.request.HTTPError as e:
return e.code, e.headers, e.read()
def demo(img_file):
appcode = '***************************'
url = "https://form.market.alicloudapi.com/api/predict/ocr_table_parse"
# url方式请求
img_file = img_file
# 如果没有configure字段,configure设为None。
configure = None
# configure = {'side':'face'}
configure = {'format':format,'side':'face'}
"""
1. format 输出格式:html/json/xlsx;
2. dir_assure 图片方向是否确定是正向的: true(确定)/false(不确定)
3. line_less:是否无线条: true(无线条,或者只有横线没有竖线)/false(有线条)
4. skip_detection: 是否跳过检测,如果没有检测到表格,可以设置"skip_detection":true"""
img_base64data = get_img_base64(img_file)
stat, header, content = predict(url, appcode, img_base64data, configure)
if stat != 200:
print('Http status code: ', stat)
print('Error msg in header: ', header['x-ca-error-message'] if 'x-ca-error-message' in header else '')
print('Error msg in body: ', content)
exit()
result_str = content
if format=='xlsx':
res_obj = json.loads(content)
with open(img_file.rsplit('.',1)[0]+'.xlsx', 'wb') as fout:
fout.write(base64.b64decode(res_obj['tables']))
print('已生成表格!')
print(result_str.decode(ENCODING))
if __name__ == '__main__':
format='xlsx'
demo('***.png')
阿里ocr表格识别代码
猜你喜欢
转载自blog.csdn.net/qq_27900321/article/details/129813248
今日推荐
周排行