Pthon学习(爬虫出问题,网址更新了)

1、爬取网址的图片

import urllib.request
import os

def get_page(url):
    req=urllib.request.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36')
    response=urllib.request.urlopen(url)
    html=response.read().decode('utf.8')

    a=html.find('righttext')+23
    b=html.find(']',a)
    print(html[a,b])
def find_imgs(url):
    pass
def save_imgs(folder,img_addrs):
    pass

def download_mm(folder='zoo',pages=5):
	os.mkdir(folder)
    os.chdir(folder)
    url="http://jandan.net/zoo"
    page_num=int(get_page(url))
    for i in range(pages):
        page_num-=i
        page_url=url+'page'+str(page_num)+'#comments'
        img_addrs=find_imgs(page_url)
        save_imgs(img_addrs)
if __name__=='__main__':
    download_mm()
发布了26 篇原创文章 · 获赞 12 · 访问量 1767

猜你喜欢

转载自blog.csdn.net/y_j_6666/article/details/104179422