import requests
import random
import re
import json
import demjson
from requests.packages.urllib3.exceptions import InsecureRequestWarning
from requests.exceptions import ReadTimeout,ConnectionError,RequestException
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
UAPOOLS=['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0',
'Mozilla/5.0 (Windows NT 6.1; rv:49.0) Gecko/20100101 Firefox/49.0',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0'
]
def get_one_page(url):
try:
head = {}
head['User-Agent'] = random.choice(UAPOOLS)
response = requests.get(url,headers=head)
if response.status_code == 200:
return response.text
return None
except RequestException:
print('请求ip代理页面出错')
return None
def parse_page_index(html):
ip_pattern = re.compile('<tr class="odd">.*?<td>(.*?)</td>.*?<td>(.*?)</td>',re.S)
ip = re.findall(ip_pattern, html)
data = dict(ip)
return (data)
def main():
url = 'http://www.xicidaili.com/nt'
html = get_one_page(url)
page = parse_page_index(html)
ip_dic2 = {}
for key in page.keys():
jian = key
zhi = page[key]
ip = key + ':' + page[key]
dic = {}
dic['http'] = ip
try:
response = requests.get("http://httpbin.org/ip", proxies=dic, verify=False)
if response.status_code == 200:
print(dic)
ip_dic2[key] = zhi
print(ip_dic2)
except RequestException:
print("不可用")
pass
return(ip_dic2)
if __name__ == '__main__':
ip_dic2 = main()
print(ip_dic2)