1. 首先使用用户名和账号,登录获取cookie
import json
import time
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
class Crawler():
def gather():
chrome_options = Options()
chrome_options.add_argument("window-size=1024,768")
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path='C:\devtool\Anaconda\Scripts\chromedriver')
wait = WebDriverWait(driver, 1)
##登录百度知道
logurl = 'https://zhidao.baidu.com/'
#登录前清楚所有cookie
driver.delete_all_cookies()
driver.get(logurl)
##登录前打印cookie
print(driver.get_cookies())
##点击登录按钮
driver.find_element_by_xpath('//*[@id="userbar-login"]').click()
# driver.find_element_by_id("userbar-login").click()
time.sleep(2)
##首次尝试的 默认进入扫码登录的界面
try:
footerULoginBtn = driver.find_element_by_xpath('//*[@id="TANGRAM__PSP_10__footerULoginBtn"]')
footerULoginBtn.click() #切换到用户名和密码登录
footerULoginBtn_not_exist = False
except:
footerULoginBtn_not_exist = True
## 用户名跟密码的设置并点击提交
user = driver.find_element_by_name('userName')
user.clear()
pwd = driver.find_element_by_name('password')
pwd.clear()
submit = driver.find_element_by_id('TANGRAM__PSP_10__submit')
time.sleep(2)
user.send_keys('用户名')
pwd.send_keys('密码')
time.sleep(1)
submit.click()
time.sleep(1)
## 发送手机验证码 验证
##点击发送按钮
###是否需要输入手机验证码
try:
driver.find_element_by_xpath('//*[@id="TANGRAM__28__button_send_mobile"]').click()
time.sleep(10)
##使用shell交互式,接受验证码
message = input("Tell me the captcha: ")
##输入验证码
captcha = driver.find_element_by_xpath('//*[@id="TANGRAM__28__input_label_vcode"]')
time.sleep(1)
captcha.send_keys(message)
time.sleep(1)
##点击提交
driver.find_element_by_xpath('//*[@id="TANGRAM__28__button_submit"]').click()
time.sleep(3)
except:
time.sleep(1)
### 获取cookie
cookie = driver.get_cookies()
print(cookie)
jsonCookies = json.dumps(cookie)
with open('vcyber.json', 'w') as f:
f.write(jsonCookies)
time.sleep(30)
Crawler.gather()
获取cookie后,可以不用输入密码登录
import json
import time
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
class Crawler():
def gather():
chrome_options = Options()
chrome_options.add_argument("window-size=1024,768")
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path='C:\devtool\Anaconda\Scripts\chromedriver')
wait = WebDriverWait(driver, 1)
##登录百度知道
logurl = 'https://zhidao.baidu.com/'
#登录前清楚所有cookie
driver.delete_all_cookies()
driver.get(logurl)
f1 = open('vcyber.json')
cookie = f1.read()
cookie = json.loads(cookie)
for c in cookie:
driver.add_cookie(c)
# # 刷新页面
driver.refresh()
Crawler.gather()