1.使用PyPDF2:
from PyPDF2.pdf import PdfFileReader,PdfFileWriter
将文件夹中的pdf.py复制粘贴到D:\python3.6.5\Lib中:
将文件夹中的pdf.py复制粘贴到D:\python3.6.5\Lib中:
使用pyPdf会报错如下:
2.利用上篇文章下载到的两篇pdf合并,会报错:
Traceback (most recent call last):
File “D:\python3.6.5\lib\PyPDF2\generic.py”, line 484, in readFromStream
return NameObject(name.decode(‘utf-8’))
UnicodeDecodeError: ‘utf-8’ codec can’t decode byte 0xcb in position 8: invalid continuation byte
return NameObject(name.decode(‘utf-8’))
UnicodeDecodeError: ‘utf-8’ codec can’t decode byte 0xcb in position 8: invalid continuation byte
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File “D:/python tests/ZQfd_paiming/test.py”, line 70, in <module>
dl.merge_pdf()
File “D:/python tests/ZQfd_paiming/test.py”, line 60, in merge_pdf
output.write(outputStream)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 482, in write
self._sweepIndirectReferences(externalReferenceMap, self._root)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 571, in _sweepIndirectReferences
self._sweepIndirectReferences(externMap, realdata)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 547, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 571, in _sweepIndirectReferences
self._sweepIndirectReferences(externMap, realdata)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 547, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 556, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, data[i])
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 571, in _sweepIndirectReferences
self._sweepIndirectReferences(externMap, realdata)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 547, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 547, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 547, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 577, in _sweepIndirectReferences
newobj = data.pdf.getObject(data)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 1611, in getObject
retval = readObject(self.stream, self)
File “D:\python3.6.5\lib\PyPDF2\generic.py”, line 66, in readObject
return DictionaryObject.readFromStream(stream, pdf)
File “D:\python3.6.5\lib\PyPDF2\generic.py”, line 579, in readFromStream
value = readObject(stream, pdf)
File “D:\python3.6.5\lib\PyPDF2\generic.py”, line 60, in readObject
return NameObject.readFromStream(stream, pdf)
File “D:\python3.6.5\lib\PyPDF2\generic.py”, line 492, in readFromStream
raise utils.PdfReadError(“Illegal character in Name Object”)
PyPDF2.utils.PdfReadError: Illegal character in Name Object
File “D:/python tests/ZQfd_paiming/test.py”, line 70, in <module>
dl.merge_pdf()
File “D:/python tests/ZQfd_paiming/test.py”, line 60, in merge_pdf
output.write(outputStream)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 482, in write
self._sweepIndirectReferences(externalReferenceMap, self._root)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 571, in _sweepIndirectReferences
self._sweepIndirectReferences(externMap, realdata)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 547, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 571, in _sweepIndirectReferences
self._sweepIndirectReferences(externMap, realdata)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 547, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 556, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, data[i])
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 571, in _sweepIndirectReferences
self._sweepIndirectReferences(externMap, realdata)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 547, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 547, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 547, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 577, in _sweepIndirectReferences
newobj = data.pdf.getObject(data)
File “D:\python3.6.5\lib\PyPDF2\pdf.py”, line 1611, in getObject
retval = readObject(self.stream, self)
File “D:\python3.6.5\lib\PyPDF2\generic.py”, line 66, in readObject
return DictionaryObject.readFromStream(stream, pdf)
File “D:\python3.6.5\lib\PyPDF2\generic.py”, line 579, in readFromStream
value = readObject(stream, pdf)
File “D:\python3.6.5\lib\PyPDF2\generic.py”, line 60, in readObject
return NameObject.readFromStream(stream, pdf)
File “D:\python3.6.5\lib\PyPDF2\generic.py”, line 492, in readFromStream
raise utils.PdfReadError(“Illegal character in Name Object”)
PyPDF2.utils.PdfReadError: Illegal character in Name Object
3.原来的pdf是1.5版本,将pdf转化为word,再在wps中将word输出为pdf,这时的pdf是1.7版本,就不再报错,但不知问题是否是版本的问题,感觉应该是pdf编码问题,但是没有找到查看pdf编码的方式。
# -*- coding:utf-8*- from PyPDF2.pdf import PdfFileReader,PdfFileWriter infilelist=['D:/python tests/ZQfd_paiming/pdf/12.pdf', 'D:/python tests/ZQfd_paiming/pdf/12.pdf'] pdffilewriter=PdfFileWriter() for infile in infilelist: pdfreader=PdfFileReader(open(infile,'rb')) numpages=pdfreader.getNumPages() for i in range(numpages): pageobj=pdfreader.getPage(i) pdffilewriter.addPage(pageobj) pdffilewriter.write(open('D:/python tests/ZQfd_paiming/pdf/zong.pdf','wb'))
#-*- coding:utf-8 -*- from PyPDF2 import PdfFileReader,PdfFileWriter import os def split_pdf_1(pdf,start,end): #切分[start,end) output=PdfFileWriter() input=PdfFileReader(open(pdf,'rb')) page_count=input.getNumPages() #页码从0计 if start>end: print('start>end') return if end>page_count: print('end>page_count') return for i in range(start,end): output.addPage(input.getPage(i)) output.write(open(''.join(pdf.split('.')[:-1])+'_1_.pdf','wb')) def split_pdf_2(pdf,num): #指定切分份数 if num<2: print('切分份数需要大于1') return input=PdfFileReader(open(pdf,'rb')) page_count=input.getNumPages() per_page=int(page_count/num) #每份页数 #print(per_page) for i in range(num): output=PdfFileWriter() out_name=''.join(pdf.split('.')[:-1])+'_2_'+str(i)+'.pdf' for j in range(per_page*i,per_page*(i+1) if i!=(num-1) else page_count): output.addPage(input.getPage(j)) output.write(open(out_name,'wb')) def split_pdf_3(pdf,per_page): #规定per_page页为一份进行切分 input=PdfFileReader(open(pdf,'rb')) page_count=input.getNumPages() if per_page>page_count: print('per_page>page_count') return num=int(page_count/per_page) #可以被切分成num+1份 for i in range(num): output=PdfFileWriter() out_name=''.join(pdf.split('.')[:-1])+'_3_'+str(i)+'.pdf' for j in range(per_page): output.addPage(input.getPage(i*per_page+j)) output.write(open(out_name,'wb')) output=PdfFileWriter() out_name=''.join(pdf.split('.')[:-1])+'_3_'+str(num)+'.pdf' for j in range(num*per_page,page_count): output.addPage(input.getPage(j)) output.write(open(out_name,'wb')) def find_pdf(filepath): #搜索出某目录下的所有pdf文件,使用os模块walk函数 #os.walk() 方法用于通过在目录树中游走输出在目录中的文件名,向上或者向下 pdf_list=[] for root,dirs,files in os.walk(filepath): for filespath in files: pdf_list.append(os.path.join(root,filespath)) return pdf_list def merge_pdf(filepath,outfile): output=PdfFileWriter() outputPages=0 pdf_file_name=find_pdf(filepath) for each in pdf_file_name: print(each) #读取源pdf文件 input=PdfFileReader(open(each,"rb")) # 获得源pdf文件中页面总数 page_count = input.getNumPages() outputPages+=page_count print(page_count) # 分别将page添加到输出output中 for iPage in range(0, page_count): output.addPage(input.getPage(iPage)) print("All Pages Number:"+str(outputPages)) # 最后写pdf文件 outputStream=open(outfile,"wb") output.write(outputStream) outputStream.close() print("finished") if __name__=='__main__': split1='D:/python tests/ZQfd_paiming/pdf/zong.pdf' #4页 split2='D:/python tests/ZQfd_paiming/pdf/zong8.pdf' #8页 split3='D:/python tests/ZQfd_paiming/pdf/zong16.pdf' #16页 split_pdf_1(split1,1,3) #将zong.pdf文件的[1,3)页分割出来,另存为zong_result.pdf split_pdf_2(split2,3) #将8页的zong8.pdf切分成3份,每份2,2,4页 split_pdf_3(split3,3) #将16页的zong16.pdf以每份3页切分,分别为3,3,3,3,3,1页 merge_pdf('D:/python tests/ZQfd_paiming/pdf/','D:/python tests/ZQfd_paiming/pdf/zongZZ.pdf')