二、爬虫辅助工具 selenium

一、应用场景

  1. 页面点击拖动,加载内容
  2. 元素拖动,验证码破解
  3. 执行js,数据获取
  4. 模拟登入
  5. 参数无法获取

二、准备

  1. 浏览器:chrome
  2. 驱动包:浏览器版本、http://chromedriver.storage.googleapis.com/index.html 下载对应的驱动
  3. Java对应webdriver 依赖的jar包

三、项目

(一)添加依赖

		<dependency>
			<groupId>org.seleniumhq.selenium</groupId>
			<artifactId>selenium-java</artifactId>
			<version>3.141.59</version>
		</dependency>
		
		<dependency>
			<groupId>org.seleniumhq.selenium</groupId>
			<artifactId>selenium-chrome-driver</artifactId>
			<version>3.141.59</version>
		</dependency>
		
		<dependency>
			<groupId>org.seleniumhq.selenium</groupId>
			<artifactId>selenium-server</artifactId>
			<version>3.141.59</version>
		</dependency>

(二)模拟百度登录

package org.pc.demo;

import org.openqa.selenium.By;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;

import java.util.Scanner;
import java.util.Set;

/**
 * @author 咸鱼
 * @date 2018/12/21 21:00
 */
public class SeleniumTest {
    private static Set<Cookie> cookies;

    public static void main(String[] args) {
        login();
    }
    public static void login() {
        //驱动位置
        System.getProperties().setProperty("webdriver.chrome.driver", "E:\\demo\\crawler\\chromedriver.exe");
        ChromeOptions options = new ChromeOptions();
        //浏览器位置
        options.setBinary("C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe");
        WebDriver driver = new ChromeDriver(options);
        //打开百度登入页面
        driver.get("https://passport.baidu.com/v2/?login&tpl=mn");
        //切换用户名密码登入
        driver.findElement(By.id("TANGRAM__PSP_3__footerULoginBtn")).click();
        //输入账号
        driver.findElement(By.id("TANGRAM__PSP_3__userName")).clear();
        driver.findElement(By.id("TANGRAM__PSP_3__userName")).sendKeys("***");
        //输入密码
        driver.findElement(By.id("TANGRAM__PSP_3__password")).clear();
        driver.findElement(By.id("TANGRAM__PSP_3__password")).sendKeys("***");
        //人工输入验证码
        Scanner sc = new Scanner(System.in);
        String s = sc.nextLine();
        driver.findElement(By.id("TANGRAM__PSP_3__verifyCode")).clear();
        driver.findElement(By.id("TANGRAM__PSP_3__verifyCode")).sendKeys(s);
        driver.findElement(By.id("TANGRAM__PSP_3__submit")).click();
        //获取cookie信息*/
        cookies = driver.manage().getCookies();
        for (Cookie cookie : cookies) {
            System.out.println(cookie.getName() + ":" + cookie.getValue());
        }
        //后续把cookie 添加进header
        driver.quit();
    }
}

(三)用selenium模拟登入,破解滑动验证码

package org.pc.demo;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import javax.imageio.ImageIO;
import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.Point;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.interactions.Actions;

import sun.misc.BASE64Decoder;

/**
 * 新版本极验官网测试旧版滑块算法(失败)
 * 有需求可对接打码平台
 */
public class JiYan {


    public static void main(String[] args) throws Exception {
        run1();
    }

    public static void decodeBase64ToImage(String base64, String imgPath) {
        BASE64Decoder decoder = new BASE64Decoder();
        try {
            File file = new File(imgPath);
            if (file.exists()) {
                file.delete();
            }
            FileOutputStream write = new FileOutputStream(new File(imgPath));
            byte[] decoderBytes = decoder.decodeBuffer(base64.replace("data:image/png;base64,", ""));
            write.write(decoderBytes);
            write.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void doGet(String url) throws Exception {
        CloseableHttpClient httpClient = HttpClients.createDefault();

        HttpGet httpGet = new HttpGet(url);
        CloseableHttpResponse response = null;

        HttpEntity entity = null;
        String s = "";
        try {
            httpGet.addHeader("User-Agent",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36");
            response = httpClient.execute(httpGet);

            entity = response.getEntity();

            System.out.println("doGet" + response.getStatusLine().getStatusCode());
            s = EntityUtils.toString(entity, "UTF-8");

        } catch (ClientProtocolException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            httpClient.close();
        }
    }

    // 判断像素是否相差过大
    private static boolean isSimilar(BufferedImage image1, BufferedImage image2, int x, int y) {
        int pixel1 = image1.getRGB(x, y);
        int pixel2 = image2.getRGB(x, y);

        int[] rgb1 = getRGB(pixel1);
        int[] rgb2 = getRGB(pixel2);

        for (int i = 0; i < 3; i++) {
            if (Math.abs(rgb1[i] - rgb2[i]) > 80) {
                return false;
            }
        }
        return true;
    }

    // 返回RGB数组
    private static int[] getRGB(int pixel) {
        int[] rgb = new int[3];
        rgb[0] = (pixel & 0xff0000) >> 16;
        rgb[1] = (pixel & 0xff00) >> 8;
        rgb[2] = (pixel & 0xff);
        return rgb;
    }

    // 计算移动距离
    private static int getDiffLocation(BufferedImage image1, BufferedImage image2) {
        int i = 0;
        for (int x = 0; x < 260; x++) {
            for (int y = 0; y < 116; y++) {
                if (isSimilar(image1, image2, x, y) == false) {
                    return x;
                }
            }
        }
        return i;
    }

    public static void run1() throws Exception {
        //设置chrome驱动路径
        System.getProperties().setProperty("webdriver.chrome.driver", "E:\\demo\\crawler\\chromedriver.exe");
        ChromeOptions options = new ChromeOptions();
        //设置chrome浏览器位置
        options.setBinary("C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe");
        WebDriver driver = new ChromeDriver(options);
        driver.get("http://www.geetest.com/type/");
        //暴力延迟
        Thread.sleep(1000);
        driver.findElement(By.xpath("//div[@class='products-content']/ul/li[3]")).click();
        Thread.sleep(1000);
        driver.findElement(By.xpath("//div[@class='products-content']/ul/li[2]")).click();
        Thread.sleep(1000);
        driver.findElement(By.xpath("//div[@class='geetest_radar_btn']/div[@class='geetest_radar_tip']/span")).click();
        System.out.println(driver.findElement(By.xpath("//div[@class='geetest_radar_tip']/span")).getText());
        Thread.sleep(1000);
        //执行js 获取验证码图片 画布转base64
        JavascriptExecutor js = (JavascriptExecutor) driver;
        String jsstr = "var oCanvas =document.getElementsByClassName('geetest_canvas_fullbg geetest_fade geetest_absolute')[0];" +
                "return oCanvas.toDataURL();";
        String o = (String) js.executeScript(jsstr);
        decodeBase64ToImage(o, "E:\\a1.png");
        String jsstr1 = "var oCanvas =document.getElementsByClassName('geetest_canvas_bg geetest_absolute')[0];" +
                "return oCanvas.toDataURL();";
        String o1 = (String) js.executeScript(jsstr1);
        decodeBase64ToImage(o1, "E:\\a2.png");

        // 获取移动距离
        Random random = new Random();
        BufferedImage image1 = ImageIO.read(new FileInputStream("E:/a1.png"));
        BufferedImage image2 = ImageIO.read(new FileInputStream("E:/a2.png"));
        int ranAddLoc = -2;
        int loc = (getDiffLocation(image1, image2) - 5) + ranAddLoc;
        System.out.println(loc);
        //移动算法
        List<Integer> trackList = getTrackList3(loc);
        Thread.sleep(200);
        // 找到滑动的圆球
        WebElement slider = driver.findElement(By.xpath("//div[@class='geetest_slider_button']"));

        // 鼠标按住滑块
        Actions actions = new Actions(driver);
        actions.clickAndHold(slider).perform();
        int a = 0;
        Point  start =slider.getLocation();
        System.out.println(slider.getLocation().toString());
        Thread.sleep(500+random.nextInt(500));

        System.out.println(slider.getLocation().toString());
        for (int i = 0; i < trackList.size(); i++) {
            a += trackList.get(i);
            actions.clickAndHold(slider).moveByOffset(trackList.get(i), 0);
            if(i<trackList.size()*4/5) {
                actions.pause(random.nextInt(20)+100);
            }else {
                actions.pause(random.nextInt(20)+200);
            }
        }

        System.out.println("释放前"+slider.getLocation().toString());
        actions.release(slider).build().perform();
        System.out.println("释放后"+slider.getLocation().toString());
    }
    //获取轨迹
    public static List<Integer> getTrackList(int loc) {
        List<Integer> list = new ArrayList<>();
        list.add(loc * 4 / 15);
        list.add(loc * 2 / 15);
        list.add(loc * 4 / 15);
        list.add(loc * 5 / 24);
        list.add(loc - (loc * 4 / 15 + loc * 2 / 15 + loc * 4 / 15 + loc * 5 / 24));
        return list;
    }
    //获取轨迹2
    public  void getTrackList2() {
        int loc;
        loc=100;
        List<Integer> list = new ArrayList<>();
        double mid = loc * 4.0 / 5;
        // 计算间隔
        double t = 0.2;
        // 初速度
        double v = 0.0;
        double current = 0.0;
        while (current < loc) {
            double a;
            if (current < mid) {
                // 加速度为正2
                a = loc*1.0;
            } else {
                // 加速度为负3
                a = -loc*1.5;
            }
            // 初速度v0
            double v0 = v;
            // 当前速度v = v0 + at
            v = v0 + a * t;
            // 移动距离x = v0t + 1/2 * a * t^2
            double move = v0 * t + 1 / 2 * a * t * t;
            // 当前位移
            current += move;
            // 加入轨迹
            // int mov1=;
            System.out.println(move);
            System.out.println(" "+current);
            list.add((int) (move));
        }
        //return null;
    }
    //获取轨迹3
    //	@Test
    public static List<Integer> getTrackList3(int loc) {
        List<Integer> list = new ArrayList<>();
        int mid = loc * 4/ 5;
        // 计算间隔
        double t = 0.2;
        // 初速度
        double v = 0;
        int current = 0;
        while (current < loc) {
            double a;
            if (current < mid) {
                // 加速度为正2
                a = loc*1.0;
            } else {
                // 加速度为负3
                a = -loc*1.5;
            }
            // 初速度v0
            double v0 = v;
            // 当前速度v = v0 + at
            v = v0 + a * t;
            // 移动距离x = v0t + 1/2 * a * t^2
            int move = (int) (v0 * t + 1 / 2 * a * t * t);
            // 当前位移
            current += move;
            // 加入轨迹
            // int mov1=;

            list.add(move);
        }
        list.add(loc-current);
        return list;
    }
}

猜你喜欢

转载自blog.csdn.net/panchang199266/article/details/85240257