用python裁切PDF文件中的图片

想把所有pdf文件的图片下边裁切掉一块，用Adobe Acrobat只能一页页处理，于是想到了用python进行批处理。

代码如下：

"""
Title: cutPdfImage
Author: JackieZheng
Date: 2024-09-26 20:51:24
LastEditTime: 2024-09-26 22:14:24
LastEditors:
Description:
FilePath: \\pythonCode\\cutPdfImage.py
"""


import fitz  # pip install pymupdf


def cut_images_and_replace(pdf_path):
    doc = fitz.open(pdf_path)
    for page_num in range(len(doc)):
        if page_num > 147:  # 从哪页开始处理
            page = doc[page_num]
            img_xrefs = page.get_images(full=True)
            for img_index, img in enumerate(img_xrefs):
                xref = img[0]
                base_image = doc.extract_image(xref)
                pix = fitz.Pixmap(doc, xref)

                # 假设裁切后图像大小
                new_width = pix.width
                new_height = pix.height - 150

                # 创建新图像
                new_pix = fitz.Pixmap(fitz.csRGB,
                                      fitz.Rect(0, 0, new_width, new_height),
                                      pix.alpha)
                # 将原图像复制到新图像中
                new_pix.copy(pix, fitz.Rect(0, 0, new_width, new_height))
                # 删除原图片
                page.delete_image(xref)
                # 插入裁切后的图片
                page.insert_image(
                    (0, 0, page.rect.width, page.rect.height - 20),
                    pixmap=new_pix,
                    keep_proportion=True)
                print(
                    f"Processed and replaced image {img_index + 1} on page {page_num + 1}."
                )
    doc.save(pdf_path[:-4] + '_processed.pdf')
    doc.close()


pdf_file_path = "C:\\Users\\JackieZheng\\Desktop\\22.pdf"
cut_images_and_replace(pdf_file_path)

PS：处理后的文件会变大，有兴趣的同学可以优化下。

用python裁切PDF文件中的图片

猜你喜欢