python也提供了线程相关的并发原语,如锁threading.Lock,事件threading.Event,条件变量threading.Condition,信号量
threading.Semaphore.
其实这些Python对象本质上都是对pthread_mutex_t, pthread_condition_t的封装。
而多线程编程同步控制的本质和原理都是一样的。
因此虽然学习的是Python中如何进行多线程编程,其实本质上是语言无关的。
talk is cheap.
我们先通过2个具体的例子来分析和理解python中的多线程编程。
1.实现2个线程交替打印奇偶数
2.实现一个支持并发使用的环形队列
代码1:2个线程交替打印:
import threading
import time
c1 = threading.Condition() #用2个条件变量控制交替执行
c2 = threading.Condition()
def prt(i, wait, notify, name):
while True:
with wait:
wait.wait()
print(i, name)
i += 2
time.sleep(1)
with notify:
notify.notify_all()
t1 = threading.Thread(target=prt, args=(0, c1, c2, "thread1", )) #等待通知交替传递
t2 = threading.Thread(target=prt, args=(1, c2, c1, "thread2", ))
t1.start()
t2.start()
with c1: #选择一个线程先运行
c1.notify_all()
t1.join()
t2.join()
代码2:一个支持并发的环形队列实现
import threading
class RingQueue:
def __init__(self, maxsize):
self._maxsize = maxsize
self._tail = 0
self._head = 0
self._len = 0
self._queue = [None for _ in range(maxsize)]
self._mutex = threading.Lock() #控制并发访问的线程锁
self.not_full = threading.Condition(self._mutex) #等待队列有空闲位置
self.not_empty = threading.Condition(self._mutex) #等待队列有数据
def put(self, item):
with self.not_full:
while self._len == self._maxsize:
self.not_full.wait()
i = self._tail
self._queue[i] = item
self._tail = (self._tail + 1 ) % self._maxsize
if self._len == 0: #当前队列为空,则尝试唤醒可能的消费者
self.not_empty.notify()
self._len += 1
return i
def get(self):
with self.not_empty:
while self._len == 0:
self.not_empty.wait()
i = self._head
data = self._queue[self._head]
self._head = (self._head + 1) % self._maxsize
if self._len == self._maxsize: #如果队列满,则唤醒可能的生产者
self.not_full.notify()
self._len -= 1
return i
def producer(q):
while True:
for i in range(10000):
print('put', q.put(i))
def consumer(q):
while True:
print('get', q.get())
q = RingQueue(10)
t1 = threading.Thread(target=producer, args=(q,))
t2 = threading.Thread(target=consumer, args=(q,))
t1.start()
t2.start()
t1.join()
t2.join()
我们再考虑为上面的队列加入以下需求:
1.我们想知道队列中的所有任务都被消费了,通常在关闭清除队列时需要知道。
我们可以通过在队列中加入另一个条件变量来实现
self.all_tasks_done = threading.Condition(self.mutex)
self.unfinished_tasks = 0
注意,这个新的条件变量和之前用于协调队列长度的锁是同一把锁。
然后增加下面2个方法:
def task_done(self):
'''
当我们从队列中取出一个任务,并处理完成后调用这个方法.
通常消费者在调用get()并完成任务后调用,用于通知正在处理的任务完成.
如果当前有一个阻塞的join调用,那么当所有任务处理完成后,会解除阻塞.
在调用次数超过队列条目数量时抛出异常.
'''
with self.all_tasks_done:
unfinished = self.unfinished_tasks - 1
if unfinished <= 0:
if unfinished < 0:
raise ValueError('task_done() called too many times')
self.all_tasks_done.notify_all()
self.unfinished_tasks = unfinished
def join(self):
'''阻塞到队列中的所有条目都被处理完成.
'''
with self.all_tasks_done:
while self.unfinished_tasks:
self.all_tasks_done.wait()
然后我们再修改put方法,每加一个任务都对unfinished_tasks进行加1.
def put(self, item):
with self.not_full:
while self._len == self._maxsize:
self.not_full.wait()
i = self._tail
self._queue[i] = item
self._tail = (self._tail + 1 ) % self._maxsize
self.unfinished_tasks += 1 #有任务加入
if self._len == 0: #当前队列为空,则尝试唤醒可能的消费者
self.not_empty.notify()
self._len += 1
return i
我们再提出一个需求,如果想在某个任务被处理完成后立刻得到通知,该如何实现?
我们可以将任务与一个Event对象相关联,处理完成后用于通知。
代码如下:
def producer(q):
while running:
evt = Event()
q.put((data, evt))
#wait for data to be processed.
evt.wait()
def consumer(q):
while True:
data, evt = q.get()
'''
process data
'''
evt.set()
在多线程程编程中,通常我们使用锁来对临界区代码进行保护,在一些老的代码中,可能会使用lock.acquire(),lock.release()来显式地获取和释放锁。这种方式,可能会忘记调用release,也比较容易出错。
因此,Python通过上下文管理协议对锁进行了封装,我们通过with语句,可以更可靠的对锁进行使用。
提到锁,可能就不得不提死锁和活锁问题。
对于死锁,我们最好只使用一把锁,如果要使用多把锁,就需要死锁避免机制。
防止死锁
方法一:以全局固定顺序加锁。
我们通过上下文管理协议,实现一个按顺序加锁的对象
from contextlib import contextmanager
_local = threading.local()
@contextmanager
def acquire(*locks):
locks = sorted(locks, key=lambda x:id(x))
acquired = getattr(_local, 'acquired', [])
if acquired and max(id(lock) for lock in acquired) >= id(locks[0]):
raise RuntimeError('Lock Order Violation')
acquired.extend(locks)
_local.acquired = acquired
try:
for lock in locks:
lock.acquire()
yield
finally:
for lock in reversed(locks):
lock.release()
del acquired[-len(locks):]
接着,用我们实现的acquire来测试确认能避免死锁问题
x_lock, y_lock = threading.Lock(), threading.Lock()
def thread1():
while True:
with acquire(x_lock, y_lock):
print('Thread-1')
def thread2():
while True:
with acquire(y_lock, x_lock):
print('Thread-2')
t1 = threading.Thread(target=thread1)
t2 = threading.Thread(target=thread2)
t1.start()
t2.start()
t1.join()
t2.join()
我们使用TLS,是为了防止多次调用acquire造成的死锁
下面的代码会抛出"Lock Order violation"异常
def thread1():
while True:
with acquire(x_lock):
with acquire(y_lock):
print('Thread-1')
time.sleep(1)
def thread2():
while True:
with acquire(y_lock):
with acquire(x_lock):
print('Thread-2')
time.sleep(1)
检测死锁并从死锁中恢复是一个很棘手的问题。
一种常见的做法是使用“watchdog”
关于死锁的一个经典问题是“哲学家进餐”问题,通过使用我们实现的acquire方法,5个哲学家能够正常的运行。
线程私有存储(TLS)
下面是一个使用TLS的例子,每个线程里都使用一个新的连接
import threading
from socket import socket, AF_INET, SOCK_STREAM
class LazyConnection:
def __init__(self, address, family=AF_INET, type=SOCK_STREAM):
self.address = address
self.family = family
self.type = type
self.local = threading.local()
def __enter__(self):
if hasattr(self.local, 'sock'):
raise RuntimeError('Already connected.')
self.local.sock = socket(self.family, self.type)
self.local.sock.connect(self.address)
return self.local.sock
def __exit__(self, exc_type, exc_val, exc_tb):
self.local.sock.close()
del self.local.sock
from functools import partial
def test(conn):
with conn as s:
s.send(b'GET /index.html HTTP/1.0\r\n')
s.send(b'Host:www.python.org\r\n')
s.send(b'\r\n')
resp = b''.join(iter(partial(s.recv,8192),b''))
print('Got {} bytes'.format(len(resp)))
if __name__ == "__main__":
conn = LazyConnection(('www.python.org',80))
t1 = threading.Thread(target=test, args=(conn, ))
t2 = threading.Thread(target=test, args=(conn, ))
t1.start()
t2.start()
t1.join()
t2.join()
线程池
python的concurrent.futures包里提供了ThreadPoolExecutor对象。
下面用一个echo server的例子来说明其使用方法
from concurrent.futures import ThreadPoolExecutor
from socket import AF_INET, SOCK_STREAM, socket
def echo_client(sock, client_addr):
print("Got connection from", client_addr)
while True:
msg = sock.recv(65536)
if not msg:
break
sock.sendall(msg)
print("Client closed.")
sock.close()
def echo_server(addr):
pool = ThreadPoolExecutor(128)
sock = socket(AF_INET, SOCK_STREAM)
sock.bind(addr)
sock.listen(5)
while True:
client_sock, client_addr = sock.accept()
pool.submit(echo_client, client_sock, client_addr)
if __name__ == "__main__":
echo_server(('', 15000)
自己实现一个线程池
如果我们想自己实现一个线程池,使用Queue来处理请求。
from queue import Queue
from threading import Thread
from socket import AF_INET, SOCK_STREAM, socket
def echo_client(q):
sock, client_addr = q.get()
print("Got connection from", client_addr)
while True:
msg = sock.recv(65536)
if not msg:
break
sock.sendall(msg)
print("Client closed.")
sock.close()
def echo_server(addr, nworks):
q = Queue()
for _ in range(nworks):
t = Thread(target=echo_client, args=(q,))
t.daemon = True
t.start()
sock = socket(AF_INET, SOCK_STREAM)
sock.bind(addr)
sock.listen(5)
while True:
client_sock, client_addr = sock.accept()
q.put((client_sock, client_addr))
if __name__ == "__main__":
echo_server(('', 15000)
使用ThreadExecutorPool的好处是,使用submit方法后可以方便的取回执行结果。
下面是代码示例:
from concurrent.futures import ThreadPoolExecutor
import urllib
def fetch_url(url):
u = urllib.request.urlopen(url)
data = u.read()
return data
pool = ThreadPoolExecutor(10)
a = pool.submit(fetch_url, 'http://www.python.org')
b = pool.submit(fetch_url, 'http://www.python.org')
x = a.result()
y = b.result()
result方法会一直阻塞到结果返回。