通用httpclient生成方式

 在做爬虫的时候,如何生成一个靠谱可用的httpclient对象是非常关键的。在踩了无数的坑之后,总结出一个较为完善的httpclient生成方式。

可以解决以下问题:

1、设置代理问题

2、设置默认的cookiestore对象,用来保存请求中的cookie。以便进行深层次访问。

3、在请求失败的重试策略问题

4、默认useragent的问题

5、https及自签名证书的验证问题

/**
     * 新建一个通用httpclientbuider
     * 使用代理时,必须一起传入host对象。
     * 不传入host对象的时候,代理不会生效
     */
    public static HttpClientBuilder getInstanceClientBuilder(boolean isNeedProxy, CookieStore store, HttpHost host, HttpRequestRetryHandler handler, String userAgent) {
        org.apache.http.ssl.SSLContextBuilder context_b = SSLContextBuilder.create();
        SSLContext ssl_context = null;
        try {
            context_b.loadTrustMaterial(null, (x509Certificates, s) -> true);
            //信任所有证书,解决https证书问题
            ssl_context = context_b.build();
        } catch (Exception e) {
            e.printStackTrace();
        }
        ConnectionSocketFactory sslSocketFactory = null;
        Registry<ConnectionSocketFactory> registry = null;
        if (ssl_context != null) {
            sslSocketFactory = new SSLConnectionSocketFactory(ssl_context, new String[]{"TLSv1", "TLSv1.1", "TLSv1.2"}, null, (s, sslSession) -> true);
            //应用多种tls协议,解决偶尔握手中断问题
            registry = RegistryBuilder.<ConnectionSocketFactory>create().register("https", sslSocketFactory).register("http", new PlainConnectionSocketFactory()).build();
        }
        PoolingHttpClientConnectionManager manager = null;
        if (registry != null) {
            manager = new PoolingHttpClientConnectionManager(registry);
        } else {
            manager = new PoolingHttpClientConnectionManager();
        }
        manager.setMaxTotal(150);
        manager.setDefaultMaxPerRoute(200);
        HttpClientBuilder builder = HttpClients.custom().setRetryHandler(handler)
                .setConnectionTimeToLive(6000, TimeUnit.SECONDS)
                .setUserAgent(userAgent);
        if (store != null) {
            builder.setDefaultCookieStore(store);
        }
        if (isNeedProxy && host != null) {
//            HttpHost proxy = new HttpHost("127.0.0.1", 1080);// 代理ip
            DefaultProxyRoutePlanner routePlanner = new DefaultProxyRoutePlanner(host);
            builder = builder.setRoutePlanner(routePlanner);
        }
        builder.setConnectionManager(manager);//httpclient连接池
        builder.setRedirectStrategy(new AllowAllRedirectStrategy());//默认重定向所有302和307,否则httpclient只自动处理get请求导致的302和307
        return builder;
    }

猜你喜欢

转载自mercymessi.iteye.com/blog/2348224
今日推荐