import xlrd
from docx import Document
from docx.shared import Inches
import os
import re
# 基础文件夹路径
base_path = "C:\\Users\\IBM\\Desktop\\xxx\\"
# excel路径
xls_path = base_path + "对照表.xlsx"
# word填写模板路径
template_doc_path = base_path + "xxxx地块调查表.docx"
# 最终生成的文件的存放路径
dist_doc_path = base_path + "dist_doc\\"
# 照片的路径
photo_path = base_path + "照片\\"
# 影像的路径
img_pos_path = base_path + "影像\\"
# 从照片名称提取编号的正则表达式,根据实际情况编写
img_code_pattern = r"(.*?)\s*\(\d\).*|(.*?)\..*"
# word里面需要填的第一个值对应excel里面的第几列,记住从0开始
word_value1_xls_colnum = 2
# word里面需要填的第二个值对应excel里面的第几列,记住从0开始
word_value2_xls_colnum = 4
# word里面需要填的第三个值对应excel里面的第几列,记住从0开始
word_value3_xls_colnum = 5
# 照片编号在excel里面的第几列,记住从0开始
photo_code_xls_colnum = 3
# 删除段落,目的是为了删除模板word里面存放照片的单元格里面的原来的那些字
def delete_paragraph(paragraph):
p = paragraph._element
p.getparent().remove(p)
p._p = p._element = None
# 根据excel里面提取出来的编号去获取该编号所对应的所有照片的路径
# 第一个参数是照片文件夹的路径
# 第二个参数是编号
def get_img_paths(base_dir, code):
# 将照片编号转换成字符串,因为直接从excel里面提取出来的编号可能是数字
code = str(code)
paths = []
# 遍历所有文件
img_names = os.listdir(base_dir)
for image_name in img_names:
match_obj = re.match(img_code_pattern, image_name)
if match_obj is not None:
img_code = match_obj.group(1)
if img_code == code:
paths.append(base_dir + image_name)
return paths
def main():
# 打开excel
workbook = xlrd.open_workbook(xls_path)
# 获取第一个sheet
data_sheet = workbook.sheet_by_index(0)
# excel行数
row_num = data_sheet.nrows
# 镇名
town_name = ''
# 村名
vilage_name = ''
# 从第1行开始遍历excel,0行是表头
for i in range(1,row_num):
# i 相当于是行号,读取一行的值,相当于一个数组
row_values = data_sheet.row_values(i)
# 取出镇名和村名
if row_values[0].strip() != "":
town_name = row_values[0]
if row_values[1].strip() != "":
vilage_name = row_values[1]
# 如果镇放在第一列这儿就是取0列的值
row_values[0] = town_name
# 如果镇放在第二列这儿就是取1列得到值 反正就是n-1
row_values[1] = vilage_name
# 读取模板word
template_doc = Document(template_doc_path)
# 去掉头 就是 (乡、镇)大棚房地块调查表 这个玩意儿
template_doc.paragraphs[0].clear()
# 获取word里面的表格
table = template_doc.tables[0]
# word 表里面的第一行
row0 = table.rows[0]
# word 表里面的第二行
row1 = table.rows[1]
# word 表里面的第三行
row2 = table.rows[2]
# row3 = table.rows[3]
# word 表里面的第一行,第二列,对应的去取excel里面需要的值,也是根据行列编号去取,下面雷同
row0.cells[1].text = str(row_values[word_value1_xls_colnum])
row0.cells[3].text = str(vilage_name)
row1.cells[1].text = str(row_values[word_value2_xls_colnum])
row1.cells[3].text = str(row_values[word_value3_xls_colnum])
# 设置照片
photo_cell = row2.cells[1]
for p in photo_cell.paragraphs:
delete_paragraph(p)
photo_imgs = get_img_paths(photo_path, row_values[photo_code_xls_colnum])
for photo_img in photo_imgs:
photo_paragraph = photo_cell.add_paragraph()
run = photo_paragraph.add_run()
run.add_picture(photo_img, width=Inches(4.3))
if len(photo_cell.paragraphs) == 0:
photo_cell.add_paragraph()
# pos_pic_cell = row3.cells[1]
# for p in pos_pic_cell.paragraphs:
# delete_paragraph(p)
# pos_imgs = get_img_paths(img_pos_path, row_values[2], False)
# for pos_img in pos_imgs:
# pos_pic_paragraph = pos_pic_cell.add_paragraph()
# run = pos_pic_paragraph.add_run()
# run.add_picture(pos_img, width=Inches(4.3))
# if len(pos_pic_cell.paragraphs) == 0:
# pos_pic_cell.add_paragraph()
# 保存修改好的word
template_doc.save(dist_doc_path + str(i) + ".docx")
if __name__=="__main__":
main()
批量导出word
猜你喜欢
转载自blog.csdn.net/qq_36808376/article/details/89335219
今日推荐
周排行