想把所有pdf文件的图片下边裁切掉一块,用Adobe Acrobat只能一页页处理,于是想到了用python进行批处理。
代码如下:
"""
Title: cutPdfImage
Author: JackieZheng
Date: 2024-09-26 20:51:24
LastEditTime: 2024-09-26 22:14:24
LastEditors:
Description:
FilePath: \\pythonCode\\cutPdfImage.py
"""
import fitz # pip install pymupdf
def cut_images_and_replace(pdf_path):
doc = fitz.open(pdf_path)
for page_num in range(len(doc)):
if page_num > 147: # 从哪页开始处理
page = doc[page_num]
img_xrefs = page.get_images(full=True)
for img_index, img in enumerate(img_xrefs):
xref = img[0]
base_image = doc.extract_image(xref)
pix = fitz.Pixmap(doc, xref)
# 假设裁切后图像大小
new_width = pix.width
new_height = pix.height - 150
# 创建新图像
new_pix = fitz.Pixmap(fitz.csRGB,
fitz.Rect(0, 0, new_width, new_height),
pix.alpha)
# 将原图像复制到新图像中
new_pix.copy(pix, fitz.Rect(0, 0, new_width, new_height))
# 删除原图片
page.delete_image(xref)
# 插入裁切后的图片
page.insert_image(
(0, 0, page.rect.width, page.rect.height - 20),
pixmap=new_pix,
keep_proportion=True)
print(
f"Processed and replaced image {img_index + 1} on page {page_num + 1}."
)
doc.save(pdf_path[:-4] + '_processed.pdf')
doc.close()
pdf_file_path = "C:\\Users\\JackieZheng\\Desktop\\22.pdf"
cut_images_and_replace(pdf_file_path)
PS:处理后的文件会变大,有兴趣的同学可以优化下。