爬妹子图的爬虫小程序

import requests
import os
from lxml import etree
from urllib import request
import random
#定义一个函数
def meizitu(url):
    headers = {
        'Cookie' : 'UM_distinctid=1654601b2fc0-05766907b723fb-37664109-144000-1654601b2ff9a9; bdshare_firstime=1534477856447; safedog-flow-item=; CNZZDATA30056528=cnzz_eid%3D1545626971-1534477372-http%253A%252F%252Fwww.meizitu.com%252F%26ntime%3D1534593928',
        'Referer' : 'http://www.meizitu.com/',
        'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',

    }
    # 创建文件夹
    if not os.path.exists('downloads'):
        os.mkdir('downloads')
    response = requests.get(url,headers=headers)

    mzt_ele = etree.HTML(response.text)

    #找到每个需要的li
    ul_ele = mzt_ele.xpath('//ul[@class="wp-list clearfix"]/li')

    for li_ele in ul_ele:
        #找到进入图片里面href
        a_href = li_ele.xpath('./div/div/a/@href')[0]
        # print(a_href)
        response =requests.get(a_href,headers=headers)
        #编码
        response.encoding = 'gb2312'
        info_ele = etree.HTML(response.text)
        #找到需要的东西
        try:
            img_info = info_ele.xpath('//div[@id="picture"]/p/img/@src')

            img_name = info_ele.xpath('//div[@id="picture"]/p/img/@alt')[0]
        except:
            img_info = info_ele.xpath('//div[@class="postContent"]/p/img/@src')

            img_name = info_ele.xpath('//div[@class="postContent"]/p/img/@alt')[0]
        try:
            for i in img_info:
                #保存图片
                img_name = img_name+str(random.random())
                print(img_name)
                request.urlretrieve(i,'downloads/'+img_name+'.jpg')
        except:
            pass


if __name__ == '__main__':
    # for i in range(1,4):

    url = 'http://www.meizitu.com/tag/quanluo_4_2.html'

    meizitu(url)



猜你喜欢

转载自blog.csdn.net/yangbenhao/article/details/81842230
今日推荐