import re from urllib.request import urlopen def getPage(url): response = urlopen(url) return response.read().decode('utf-8') def parsePage(s): ret = re.findall( '<div class="item">.*?<div class="pic">.*?<em .*?>(?P<id>\d+).*?<span class="title">(?P<title>.*?)</span>' '.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>',s,re.S) return ret def main(num): URL= ' Https://movie.douban.com/top250?start=%s&filter= ' % NUM response_html = the getPage (URL) RET = parsePage (response_html) Print (RET) COUNT = 0 for I in Range (10): # 10 main (COUNT) COUNT + = 25 # url from the web to get down the code # bytes decode -> UTF-8 is my web content to be matched string # RET = re.findall (regular, with matching character string) #ret is a list of all the content to match the composition of