__author__ = 'change' # coding=utf-8 """ ** Python Blog's Visit Count V2.0 ** (V1.0 http://blog.csdn.net/change518/article/details/14108511) ** By change ** 2015.11.4 ** http://blog.csdn.net/change518 ** 首先遍历获取文章列表,提取每篇博客的地址 ** 再构造HTTP请求访问这些地址,使用了线程提高速度 ** 将博客中所有文章访问一遍,从而达到刷访问量的目的 ** 由于缓存的原因,访问量一段时间后才会更新 """ import urllib2 import re import datetime import Queue import threading # 记录程序运行开始时间 startTime = datetime.datetime.now() # 线程数 threadNum = 10 threadList = [] # 所有文章链接地址列表 myList = list() myLinks = Queue.Queue() # 添加请求头 requestHeader = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36"} # 循环读取分页 for i in range(1, 5): url = "http://blog.csdn.net/change518/article/list/" + str(i) + "?viewmode=contents" request = urllib2.Request(url, headers=requestHeader) response = urllib2.urlopen(request) htmlResult = response.read() myPattern = '<span class="link_title"><a href="/change518/article/details/\d{7,8}">' firstResult = re.findall(myPattern, htmlResult) myPattern = '/change518/article/details/\d{7,8}' firstResultStr = ''.join(firstResult) myList += re.findall(myPattern, firstResultStr) # 将List中的所有元素添加到Queue中 for linkAddress in myList: myLinks.put('http://blog.csdn.net' + linkAddress) def mySpiderThread(j): """ 循环读取URL列表,使用Queue进行线程间资源同步 :return: """ while not (myLinks.empty()): singleLink = myLinks.get() request = urllib2.Request(singleLink, headers=requestHeader) print singleLink + " :" +str(j) for i in range(10): urllib2.urlopen(request) # 建立 threadNum 个线程 for i in range(threadNum): t = threading.Thread(target=mySpiderThread, args=(i,)) threadList.append(t) # 开启 threadNum 个线程 for i in range(threadNum): threadList[i].start() # 程序挂起,直到所有线程结束 for i in range(threadNum): threadList[i].join() """ 如果不需要在所有线程执行完毕后进行一些操作,如统计所有线程的执行时间等 上面3个for循环也可以写成: # 开启 threadNum 个线程 for i in range(threadNum): t = threading.Thread(target=mySpiderThread, args=(i,)) t.start() """ print 'Done' # 记录程序运行结束时间 endTime = datetime.datetime.now() # 计算程序运行时长 print (endTime - startTime).seconds
.