批量制作博文清单

import re
from urllib.request import urlopen

baseurl = 'https://blog.csdn.net/gf_lvah/article/list/'
pages = 7


def get_page(url):
    return urlopen(url).read().decode('utf-8')


def parse_content(content):
    pattern = r' <a href="(.*?)" target="_blank">\s+<span class="article-type type-1">\s+.*?</span>\s+(.*?)\s+</a>'
    return re.findall(pattern, content)


List = []

for i in range(7):
    url = baseurl + str(i + 1)
    print(url)
    content = get_page(url)

    print("爬取第%d页" % (i + 1))
    print(parse_content(content))
    List.extend(parse_content(content))

with open('csdn.txt', 'w') as f:
    l = len(List)
    for url, name in List:
        print(url,name)
        name = name.strip()
        f.write('[ 第%s篇博文 : %s ](%s)\n\n' %(l, name, url))
        l -= 1


猜你喜欢

转载自blog.csdn.net/gf_lvah/article/details/81171759