一、项目简介
1.内容:抓取 斗图啦 的表情包保存在本地
2.解析:xpath
3.涉及类库:lxml、requests
二、代码
# python
# -*- coding:utf-8 -*-
# author:Only time:2019/8/19
import requests
from lxml import etree
def get_url():
urllist = []
for url in range(1,3):
URL = 'https://www.doutula.com/photo/list/?page=' + str(url)
urllist.append(URL)
return urllist
def url_html_parse_save():
header = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'
}
number = 0
for url in get_url():
number += 1
response = requests.get(url,headers=header)
print(response.status_code)
response = response.text
html = etree.HTML(response)
divs = html.xpath('//div[@class="page-content text-center"]//a')
for div in divs:
name = div.xpath('.//p/text()')[0]
img = div.xpath('.//img[@referrerpolicy="no-referrer"]/@data-original')[0]
print(name)
image = requests.get(img).content # 照片的二进制编码
with open('./图片/%s.jpg' % name, 'wb') as f:
f.write(image)
print('第'+str(number)+'页保存成功')
def run():
url_html_parse_save()
if __name__ == "__main__":
run()