批量导出word

import xlrd
from docx import Document
from docx.shared import Inches
import os
import re

# 基础文件夹路径
base_path = "C:\\Users\\IBM\\Desktop\\xxx\\"
# excel路径
xls_path = base_path + "对照表.xlsx"
# word填写模板路径
template_doc_path = base_path + "xxxx地块调查表.docx"
# 最终生成的文件的存放路径
dist_doc_path = base_path + "dist_doc\\"
# 照片的路径
photo_path = base_path + "照片\\"
# 影像的路径
img_pos_path = base_path + "影像\\"
# 从照片名称提取编号的正则表达式，根据实际情况编写
img_code_pattern = r"(.*?)\s*\(\d\).*|(.*?)\..*"
# word里面需要填的第一个值对应excel里面的第几列，记住从0开始
word_value1_xls_colnum = 2
# word里面需要填的第二个值对应excel里面的第几列，记住从0开始
word_value2_xls_colnum = 4
# word里面需要填的第三个值对应excel里面的第几列，记住从0开始
word_value3_xls_colnum = 5
# 照片编号在excel里面的第几列，记住从0开始
photo_code_xls_colnum = 3



# 删除段落，目的是为了删除模板word里面存放照片的单元格里面的原来的那些字
def delete_paragraph(paragraph):
    p = paragraph._element
    p.getparent().remove(p)
    p._p = p._element = None


# 根据excel里面提取出来的编号去获取该编号所对应的所有照片的路径
# 第一个参数是照片文件夹的路径
# 第二个参数是编号
def get_img_paths(base_dir, code):
    # 将照片编号转换成字符串，因为直接从excel里面提取出来的编号可能是数字
    code = str(code)
    paths = []
    # 遍历所有文件
    img_names = os.listdir(base_dir)
    for image_name in img_names:
        match_obj = re.match(img_code_pattern, image_name)
        if match_obj is not None:
            img_code = match_obj.group(1)
            if img_code == code:
                paths.append(base_dir + image_name)
    return paths
    

def main():
    # 打开excel
    workbook = xlrd.open_workbook(xls_path)
    # 获取第一个sheet
    data_sheet = workbook.sheet_by_index(0)
    # excel行数
    row_num = data_sheet.nrows
    # 镇名
    town_name = ''
    # 村名
    vilage_name = ''


    # 从第1行开始遍历excel，0行是表头
    for i in range(1,row_num):
        # i 相当于是行号，读取一行的值，相当于一个数组
        row_values = data_sheet.row_values(i)
        # 取出镇名和村名
        if row_values[0].strip() != "":
            town_name = row_values[0]
        if row_values[1].strip() != "":
            vilage_name = row_values[1]
        # 如果镇放在第一列这儿就是取0列的值
        row_values[0] = town_name
        # 如果镇放在第二列这儿就是取1列得到值 反正就是n-1
        row_values[1] = vilage_name

         # 读取模板word
        template_doc = Document(template_doc_path)
        # 去掉头 就是   （乡、镇）大棚房地块调查表 这个玩意儿
        template_doc.paragraphs[0].clear()

        # 获取word里面的表格
        table = template_doc.tables[0]
        # word 表里面的第一行
        row0 = table.rows[0]
        # word 表里面的第二行
        row1 = table.rows[1]
        # word 表里面的第三行
        row2 = table.rows[2]
        # row3 = table.rows[3]
        # word 表里面的第一行，第二列，对应的去取excel里面需要的值，也是根据行列编号去取，下面雷同
        row0.cells[1].text = str(row_values[word_value1_xls_colnum])
        row0.cells[3].text = str(vilage_name)
        row1.cells[1].text = str(row_values[word_value2_xls_colnum])
        row1.cells[3].text = str(row_values[word_value3_xls_colnum])
    
        # 设置照片
        photo_cell = row2.cells[1]
        for p in photo_cell.paragraphs:
            delete_paragraph(p)
        photo_imgs = get_img_paths(photo_path, row_values[photo_code_xls_colnum])
        for photo_img in photo_imgs:
            photo_paragraph = photo_cell.add_paragraph()
            run = photo_paragraph.add_run()
            run.add_picture(photo_img, width=Inches(4.3))
        if len(photo_cell.paragraphs) == 0:
            photo_cell.add_paragraph()

        # pos_pic_cell = row3.cells[1]
        # for p in pos_pic_cell.paragraphs:
        #     delete_paragraph(p)
        # pos_imgs = get_img_paths(img_pos_path, row_values[2], False)
        # for pos_img in pos_imgs:
        #     pos_pic_paragraph = pos_pic_cell.add_paragraph()
        #     run = pos_pic_paragraph.add_run()
        #     run.add_picture(pos_img, width=Inches(4.3))
        # if len(pos_pic_cell.paragraphs) == 0:
        #     pos_pic_cell.add_paragraph()

        # 保存修改好的word
        template_doc.save(dist_doc_path + str(i) + ".docx")



if __name__=="__main__":
    main()
猜你喜欢