【11.3】select+回调+事件循环获取html

 1 #!/usr/bin/env python
 2 # -*- coding:utf-8 -*-
 3 
 4 # 通过非阻塞io实现http请求
 5 import socket
 6 from urllib.parse import urlparse
 7 from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE
 8 
 9 selector = DefaultSelector()
10 
11 urls = ['http://www.baidu.com']
12 stop = False
13 
14 
15 # 使用select完成http请求
16 class Fetcher:
17     def connected(self, key):
18         # 注销事件
19         selector.unregister(key.fd)
20         self.client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode('utf-8'))
21         #
22         selector.register(self.client.fileno(), EVENT_READ, self.readable)
23 
24     def readable(self, key):
25         d = self.client.recv(1024)
26         if d:
27             self.data += d
28         else:
29             # 注销
30             selector.unregister(key.fd)
31             self.data = self.data.decode('utf-8')
32             html_data = self.data.split('\r\n\r\n')[1]
33             print(self.data)
34             print(html_data)
35             self.client.close()
36             urls.remove(self.spider_url)
37             if not urls:
38                 global stop
39                 stop = True
40 
41     def get_url(self, url):
42         self.spider_url = url
43         # 通过socket请求html
44         url = urlparse(url)
45         self.host = url.netloc
46         self.path = url.path
47         self.data = b""
48         if self.path == '':
49             self.path = '/'
50 
51         # 建立连接
52         self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
53         self.client.setblocking(False)
54 
55         try:
56             self.client.connect((self.host, 80))
57         except BlockingIOError as e:
58             pass
59 
60         # 注册事件
61         selector.register(self.client.fileno(), EVENT_WRITE, self.connected)
62         """
63         register(fileobj, events, data=None)
64         fileobj:文件描述符
65         events:监听事件
66         data:回调函数
67         
68         """
69 
70 
71 def loop():
72     # 事件循环,不停的请求socket的状态并调用对应的回调函数
73     # 1.select本身是不支持register模式。
74     # 2.socket状态变化以后的回调是由程序员完成的。
75     while not stop:
76         # windows下会报错,但是linux下不会报错
77         ready = selector.select()
78         for key, mask in ready:
79             call_back = key.data
80             call_back(key)
81 
82 
83 if __name__ == '__main__':
84     fetcher = Fetcher()
85     fetcher.get_url('http://www.baidu.com')
86     loop()

猜你喜欢

转载自www.cnblogs.com/zydeboke/p/11328584.html