Java HttpClient爬虫请求

**本项目采用spring-boot构建, maven工程

添加依赖

pom文件

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>org.test</groupId>
  <artifactId>testDome</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <dependencies>
  <dependency>
  <groupId>org.apache.httpcomponents</groupId>
  <artifactId>httpclient</artifactId>
  <version>4.5.5</version>
  </dependency>
  <dependency>
  <groupId>com.alibaba</groupId>
  <artifactId>fastjson</artifactId>
  <version>1.2.47</version>
  </dependency>
  </dependencies>
</project>

GET 无参形式

package testDemo;

import org.apache.http.Header;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

public class DoGET {
    
    

    public static void main(String[] args) throws Exception {
    
    


//        RequestConfig config = RequestConfig.custom().setRedirectsEnabled(false).build();//不允许重定向
//        CloseableHttpClient httpClient = HttpClients.custom().setDefaultRequestConfig(config).build();


//        proxyHost -- 代理ip; proxyPort -- 端口号
//        int proxyPort = 8000;
//        String proxyHost = "192.10.2.125";
//        HttpHost proxy = new HttpHost(proxyHost, proxyPort, "HTTP");

        //创建Httpclient对象
        CloseableHttpClient httpclient = HttpClients.createDefault();

        //get请求(忽略SSL证书),获取结果
        // TODO: 2020/4/27  忽略SSL证书



        //创建http GET请求
        HttpGet get = new HttpGet("http://www.baidu.com");
//        CloseableHttpResponse response = httpclient.execute(proxy, get);
        CloseableHttpResponse response = httpclient.execute(get);

        try {
    
    
            // 执行请求
            response = httpclient.execute(get);
            // 判断返回状态是否为200
            if (response.getStatusLine().getStatusCode() == 200) {
    
    
                //请求体内容
                
                String content = EntityUtils.toString(response.getEntity(), "UTF-8");

                //内容
                System.out.println("<<" + content + ">>");
                System.out.println("内容长度:" + content.length());

//                Header[] cookie =  response.getHeaders("Set-Cookie");

            }
        } finally {
    
    
            if (response != null) {
    
    
                response.close();
            }
            //相当于关闭浏览器
            httpclient.close();
        }
    }
}

GET带参请求

package testDemo;

import java.io.File;
import java.net.URI;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

/**
 * 带参数的GET请求
 * 两种方式:
 * 1.直接将参数拼接到url后面 如:?wd=java
 * 2.使用URI的方法设置参数 setParameter("wd", "java")
 */
public class DoGETParam {
    
    
    public static void main(String[] args) throws Exception {
    
    
        // 创建Httpclient对象
        CloseableHttpClient httpclient = HttpClients.createDefault();
        // 定义请求的参数
        URI uri = new URIBuilder("http://www.baidu.com/s").setParameter("wd", "java").build();
        // 创建http GET请求
        HttpGet httpGet = new HttpGet(uri);
        //response 对象
        CloseableHttpResponse response = null;
        try {
    
    
            // 执行http get请求
            response = httpclient.execute(httpGet);
            // 判断返回状态是否为200
            if (response.getStatusLine().getStatusCode() == 200) {
    
    
                String content = EntityUtils.toString(response.getEntity(), "UTF-8");
                //内容
                System.out.println("内容长度:" + content.length());
                System.out.println("内容<<:" + content);
            }
        } finally {
    
    
            if (response != null) {
    
    
                response.close();
            }
            httpclient.close();
        }
    }
}

POST无参请求

package testDemo;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

public class DoPOST {
    
    
    public static void main(String[] args) throws Exception {
    
    




        // 创建Httpclient对象
        CloseableHttpClient httpclient = HttpClients.createDefault();
        // 创建http POST请求
        HttpPost httpPost = new HttpPost("http://www.oschina.net/");
        //伪装浏览器请求
        httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
        CloseableHttpResponse response = null;
        try {
    
    
            // 执行请求
            response = httpclient.execute(httpPost);
            // 判断返回状态是否为200
            if (response.getStatusLine().getStatusCode() == 200) {
    
    
                String content = EntityUtils.toString(response.getEntity(), "UTF-8");
                //内容写入文件
                System.out.println(">>" + content);
                System.out.println("内容长度:" + content.length());
            } else {
    
    
                System.out.println(response.getStatusLine().getStatusCode());
                String content = EntityUtils.toString(response.getEntity(), "UTF-8");
                System.out.println(">>" + content);
            }
        } finally {
    
    
            if (response != null) {
    
    
                response.close();
            }
            httpclient.close();
        }
    }
}

POST带参请求

package testDemo;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

/**
 * 带有参数的Post请求
 * Mengtao
 */
public class DoPOSTParam {
    
    
    public static void main(String[] args) throws Exception {
    
    
        // 创建Httpclient对象
        CloseableHttpClient httpclient = HttpClients.createDefault();
        // 创建http POST请求
        HttpPost httpPost = new HttpPost("http://www.bcia.com.cn/bcia/FAQ/search");
        // 设置2个post参数
        List<NameValuePair> parameters = new ArrayList<NameValuePair>(0);
        parameters.add(new BasicNameValuePair("lang", "cn"));
        parameters.add(new BasicNameValuePair("pageNum", "1"));
        // 构造一个form表单式的实体
        UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(parameters);
        // 将请求实体设置到httpPost对象中
        httpPost.setEntity(formEntity);
        //伪装浏览器
        httpPost.setHeader("Referer", "http://www.bcia.com.cn/cjwt.html");
        httpPost.setHeader("Host", "www.bcia.com.cn");
        httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36");


        CloseableHttpResponse response = null;
        try {
    
    
            // 执行请求
            response = httpclient.execute(httpPost);
            // 判断返回状态是否为200
            if (response.getStatusLine().getStatusCode() == 200) {
    
    
                String content = EntityUtils.toString(response.getEntity(), "UTF-8");
                System.out.println("内容" + content);
                System.out.println("内容长度:" + content.length());
            } else
                System.out.println("内容111" + response.getStatusLine().getStatusCode());
        } finally {
    
    
            if (response != null)
                response.close();
        }
        httpclient.close();
    }
}

猜你喜欢

转载自blog.csdn.net/qq_41369057/article/details/131222505