# -*- coding: utf-8 -*
import requests
import random
import re
import json
import demjson # Python 对象编码成 JSON 字符串
from requests.packages.urllib3.exceptions import InsecureRequestWarning #进行GET时,关闭证书验证
from requests.exceptions import ReadTimeout,ConnectionError,RequestException #异常链接模块
import threading #多线程模块
requests.packages.urllib3.disable_warnings(InsecureRequestWarning) #进行GET时,关闭证书验证
UAPOOLS=['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0',
'Mozilla/5.0 (Windows NT 6.1; rv:49.0) Gecko/20100101 Firefox/49.0',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0'
] #头部信息列表池
def get_one_page(url):
try:
head = {}
head['User-Agent'] = random.choice(UAPOOLS) # 从序列中随机选择一个元素,写入User Agent信息
response = requests.get(url,headers=head) # 打开网址
if response.status_code == 200: # 判断状态码
return response.text # 状态码成功返回网页源代码
return None # 状态码其他结果返回None
except RequestException:
print('请求ip代理页面出错')
return None
def parse_page_index(html): #获取代理
ip_pattern = re.compile('<td data-title="IP">(.*?)</td>.*?<td data-title="PORT">(.*?)</td>',re.S)
ip = re.findall(ip_pattern, html) #获取所有ip地址
data = dict(ip)#转化成字典
return (data)
def main():
url = 'https://www.kuaidaili.com/free/inha/' #ip代理
html = get_one_page(url)
page = parse_page_index(html)
print(page)
ip_dic2 = {}
for key in page.keys():
jian = key
zhi = page[key]
ip = key + ':' + page[key]
dic = {}
dic['http'] = ip
try:
response = requests.get("http://httpbin.org/ip", proxies=dic, verify=False)
if response.status_code == 200:
print(dic)
ip_dic2[key] = zhi
print(ip_dic2)
except RequestException:
print("不可用")
pass
return(ip_dic2)
if __name__ == '__main__':
ip_dic2 = main()
print(ip_dic2)
ip代理 快代理版本
猜你喜欢
转载自blog.csdn.net/qq_15907907/article/details/80388557
今日推荐
周排行