2018年6月10日笔记

  • html转pdf文件

抓取aming的linux教程,抓取每个网页。

 1 import re
 2 import requests
 3 
 4 reg = re.compile(r"<h3>目录列表</h3>\s+<ul>\s+([\s\S]*?</ul>)")
 5 url = "http://www.apelearn.com/study_v2/"
 6 session = requests.session()
 7 r = session.get(url)
 8 # print(r.encoding)
 9 r.encoding = "utf-8"
10 html = r.text
11 # print(html)
12 htmlli = reg.findall(html)
13 # print(htmlli)
14 regurl = re.compile(r'''href="(.*?)"''')
15 if htmlli[0]:
16     result = regurl.findall(htmlli[0])
17     # print(result)
18 
19 urls = list()
20 for i in result:
21     url = "http://www.apelearn.com/study_v2/{0}".format(i)
22     print(url)
23     urls.append(urls)
http://www.apelearn.com/study_v2/chapter1.html
http://www.apelearn.com/study_v2/chapter2.html
http://www.apelearn.com/study_v2/chapter3.html
http://www.apelearn.com/study_v2/chapter4.html
http://www.apelearn.com/study_v2/chapter5.html
http://www.apelearn.com/study_v2/chapter6.html
http://www.apelearn.com/study_v2/chapter7.html
http://www.apelearn.com/study_v2/chapter8.html
http://www.apelearn.com/study_v2/chapter9.html
http://www.apelearn.com/study_v2/chapter10.html
http://www.apelearn.com/study_v2/chapter11.html
http://www.apelearn.com/study_v2/chapter12.html
http://www.apelearn.com/study_v2/chapter13.html
http://www.apelearn.com/study_v2/chapter14.html
http://www.apelearn.com/study_v2/chapter15.html
http://www.apelearn.com/study_v2/chapter16.html
http://www.apelearn.com/study_v2/chapter17.html
http://www.apelearn.com/study_v2/chapter18.html
http://www.apelearn.com/study_v2/chapter19.html
http://www.apelearn.com/study_v2/chapter20.html
http://www.apelearn.com/study_v2/chapter21.html
http://www.apelearn.com/study_v2/chapter22.html
http://www.apelearn.com/study_v2/chapter23.html
http://www.apelearn.com/study_v2/chapter24.html
http://www.apelearn.com/study_v2/chapter25.html
http://www.apelearn.com/study_v2/chapter26.html

生成一个空白的PDF

 1 import codecs
 2 import PyPDF2
 3 import os
 4 
 5 files = list()
 6 for fileName in os.listdir("aming"):
 7     if fileName.endswith(".pdf"):
 8         files.append(fileName)
 9 
10 newFiles = sorted(files, key=lambda d: int(d.split(".pdf")[0]))
11 print(newFiles)
12 
13 
14 os.chdir("aming")
15 pdfWriter = PyPDF2.PdfFileWriter()#生成一个空白的pdf
16 for item in newFiles:
17     pdfReader = PyPDF2.PdfFileReader(open(item, "rb"))
18     for page in range(pdfReader.numPages):
19         pdfWriter.addPage(pdfReader.getPage(page))
20 
21 with codecs.open("aminglinux.pdf", "wb") as f:
22     pdfWriter.write(f)
[]
  • python3使用模块PIL来处理图片
from PIL import Image

image = Image.open("test.jpg")
print(image.format, image.size, image.mode)
image.show()

cutjpg = image.crop((320, 65, 460, 220))
cutjpg.show()

position = (320, 65, 460, 220)
cutjpg = image.crop(position).transpose(Image.ROTATE_180)
image.paste(cutjpg, position)
image.show()

(x, y) = image.size
newx = 30
newy = int(y*newx/x)
newimage = image.resize((newx, newy))
newimage.show()


#image的方法
#image.show()
#image.open(file)
#image.save(outputfile)
#image.crop(left, upper, right, lower)#抠图

# Image的几何处理:
# out = im.resize((128, 128))                     #调整图片大小
# out = im.rotate(45)                             #逆时针旋转 45 度角。
# out = im.transpose(Image.FLIP_LEFT_RIGHT)       #左右对换。
# out = im.transpose(Image.FLIP_TOP_BOTTOM)       #上下对换。
# out = im.transpose(Image.ROTATE_90)             #旋转 90 度角。
# out = im.transpose(Image.ROTATE_180)            #旋转 180 度角。
# out = im.transpose(Image.ROTATE_270)            #旋转 270 度角。
 1 import random
 2 import string
 3 
 4 from PIL import Image, ImageFont, ImageDraw, ImageFilter
 5 
 6 font_path = "msyh.ttf"
 7 number = 4
 8 size = (100, 30)
 9 bgcolor = (255, 255, 255)
10 fontcolor = (0, 0, 255)
11 linecolor = (255, 0, 0)
12 draw_line = True
13 # 加入干扰线条数的上下限
14 line_number = 30
15 
16 
17 #生成一个随机字符串
18 
19 def getNumber():
20     source = list(string.ascii_letters) + list(string.digits)
21     return "".join(random.sample(source, number))
22 
23 #绘制干扰线
24 def getLine(draw, width, height):
25     begin = random.randint(0, width), random.randint(0, height)
26     end  = random.randint(0, width), random.randint(0, height)
27     draw.line([begin, end], fill=linecolor)
28 
29 def getCode():
30     width, height = size
31     image = Image.new("RGBA", size, bgcolor)
32     font = ImageFont.truetype(font_path, 25)
33     draw = ImageDraw.Draw(image)
34     text = getNumber()
35     font_width, font_height = font.getsize(text)
36     draw.text(((width - font_width) / 2, (height - font_height) / 2), text, font=font, fill=fontcolor)  # 填充字符串
37     if draw_line:
38         for i in range(line_number):
39             getLine(draw, width, height)
40 
41     # image = image.transform((width + 20, height + 10), Image.AFFINE, (1, -0.3, 0, -0.1, 1, 0), Image.BILINEAR)  # 创建扭曲
42     image = image.filter(ImageFilter.EDGE_ENHANCE_MORE)  # 滤镜,边界加强
43     image.save('idencode.png')  # 保存验证码图片
44     # image.show()
45 if __name__ == '__main__':
46     getCode()

猜你喜欢

转载自www.cnblogs.com/karl-python/p/9170522.html