selenium模拟 + 鼠标滚动爬取魔方公寓租房评论信息

因为魔方公寓的评论信息是在一个单独的div中, 需要模拟鼠标滚动才能拿到评论信息, 并且每次只能拿到在页面显示的内容, 页面没有显示的取出来的是空. 因此只能在每次滚动后取值保存.

代码:

import time
import win32api, win32con
from win32api import GetSystemMetrics
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from win32con import SM_CXSCREEN, SM_CYSCREEN

options = Options()
# options.add_argument("--headless")
driver = webdriver.Chrome(executable_path=r"D:\tools\chromedriver.exe", options=options)
url = "https://www.52mf.com.cn/shopDetails/8069"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
    "Referer":"https://www.52mf.com.cn/"
}
driver.get(url=url)
for i in range(3):
    driver.execute_script("window.scrollBy(0,200)")
    time.sleep(0.2)
all_ping = driver.find_element_by_css_selector("#anchor6 > div.all-comment.cursor-pointer")
all_ping.click()
time.sleep(1)
# 找到div的位置, 并将鼠标移动到div上
x = GetSystemMetrics(SM_CXSCREEN)
y = GetSystemMetrics(SM_CYSCREEN)
driver.maximize_window()
ActionChains(driver).move_by_offset(x/2,y/2)
# 设置鼠标滚动次数
for i in range(260):
    # 模拟鼠标滚动
    win32api.mouse_event(win32con.MOUSEEVENTF_WHEEL, 0, 0, -448)
    time.sleep(0.2)
    ping_list = driver.find_elements_by_xpath('//div[@id="modal-root"]//ul/li/p')
    time.sleep(0.2)
    for ping in ping_list:
        ping_str = ping.text.strip()
        # 页面没显示的取出的为空, 判断取出内容是否为空, 为空跳过 不为空写入文件
        if ping_str == "":
            pass
        else:
            with open("ping.txt","a",encoding="utf8") as fp:
                fp.write(ping_str + "\n")
    time.sleep(0.1)
driver.close()

猜你喜欢

转载自blog.csdn.net/qq_42827960/article/details/84994106