python(2.7)中多线程使用举例
下边的代码都不难理解,不做多余解释。唯一有困惑的地方已经在源码中注释说明。这里也不做多线程编码知识的讲解。把这几种形式(主要是第三种)练成muscle memory就行了,整理在这里是为了有时突然生疏方便查找,同时也做知识分享。
一、使用thread模块
import requests
import thread
import time
headers = {'User-Agent': 'mySpider-please let me try','Version':'1.0'}
def spider(url):
r = requests.get(url,headers=headers)
print r.status_code,len(r.content),time.ctime().split(' ')[3]
def main():
for i in range(2304,2310,1):
url = 'http://xxxx.net/ooxx/page-'+str(i)
thread.start_new_thread(spider,(url,))
time.sleep(0.1)
if __name__ == '__main__':
main()
二、使用threading模块
import requests
import threading
import time
headers = {'User-Agent':'mySpider-please let me try','Version':'1.1'}
def spider(url):
r = requests.get(url=url,headers=headers)
print r.status_code,len(r.content),time.ctime().split(' ')[3]
time.sleep(0.1)
def main():
urls = []
for i in range(2304,2310,1):
url = 'http://xxxx.net/ooxx/page-'+str(i)
urls.append(url)
threads = []
thread_count = len(urls)
for i in range(thread_count):
t = threading.Thread(target=spider,args=(urls[i],))
threads.append(t)
for i in range(thread_count):
threads[i].start()
for i in range(thread_count):
threads[i].join()
if __name__ == '__main__':
main()
三、threading模块+Queue模块
import requests
import threading
import time
from Queue import Queue
class XxxxSpider(threading.Thread):
"""docstring for XxxxSpider"""
def __init__(self,queue):
threading.Thread.__init__(self)
self._queue = queue
def run(self):
"""
start()让run()在新线程里面运行。直接调用run()就是在当前线程运行了。
start()调用_thread的start_new_thread去运行一个bootstrap方法,在里面做一些准备工作后会调用run()
"""
while not self._queue.empty():
page_url = self._queue.get_nowait()
print page_url
headers = {'User-Agent':'mySpider-please let me try','Version':'1.2'}
def spider(url):
r = requests.get(url=url,headers=headers)
print r.status_code,len(r.content),time.ctime().split(' ')[3]
time.sleep(0.1)
def main():
queue = Queue()
for i in range(2304,2310,1):
queue.put('http://xxxx.net/ooxx/page-'+str(i))
threads = []
thread_count = 10
for i in xrange(thread_count):
threads.append(XxxxSpider(queue))
for t in threads:
t.start()
"""
start()让run()在新线程里面运行。直接调用run()就是在当前线程运行了。
start()调用_thread的start_new_thread去运行一个bootstrap方法,在里面做一些准备工作后会调用run()
"""
for t in threads:
t.join()
if __name__ == '__main__':
main()