爬虫代理的设置

1. requests 中的设置

import requests

import random

# 测试url
url ='https://www.baidu.com'
url2 ='https://httpbin.org/get'
# 代理池
proxy_pool =['138.201.223.250:31288', '196.13.208.23:8080', '91.197.132.99:53281']

def get_content(url,proxy_pool):
    # proxy = '138.201.223.250:31288'
    proxy = random.choices(proxy_pool)[0]
    print('本次使用的代理为:%s'%proxy)
    proxies = {
        'https':'http://'+proxy,
        'http': 'http://' + proxy
    }

    try :
        res = requests.get(url=url,proxies=proxies)
        print(res.status_code)
        print(res.text)

    except requests.exceptions.ConnectionError as e:

        print('Error:',e.args)

get_content(url2,proxy_pool)
# get_content(url,proxy_pool)

2. 在selenium 中的设置

import random
import requests
from selenium import webdriver
# 测试url
url ='https://www.baidu.com'
url2 ='http://httpbin.org/get'
# 代理池
proxy_pool =['138.201.223.250:31288', '196.13.208.23:8080', '91.197.132.99:53281']

proxy = random.choices(proxy_pool)[0]

chrome_options = webdriver.ChromeOptions()

chrome_options.add_argument('--proxy_server=https://%s'%proxy) bro = webdriver.Chrome(chrome_options=
chrome_options) bro.get(url)

3. PhantomJS中的设置

from selenium import webdriver


url ='http://www.baidu.com'
url2 ='http://httpbin.org/get'
service_args =[
    '--proxy=196.13.208.23:8080',
    '--proxy_type=https'
]
bro =webdriver.PhantomJS(executable_path=r'D:\phantomjs\bin\phantomjs.exe',service_args=service_args)

bro.get(url)

print(bro.page_source)

待续!

猜你喜欢

转载自www.cnblogs.com/knighterrant/p/10798366.html
今日推荐