1 package httpClient.client;
2
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.util.UUID;
7
8 import org.apache.commons.io.FileUtils;
9 import org.apache.http.HttpEntity;
10 import org.apache.http.client.ClientProtocolException;
11 import org.apache.http.client.methods.CloseableHttpResponse;
12 import org.apache.http.client.methods.HttpGet;
13 import org.apache.http.impl.client.CloseableHttpClient;
14 import org.apache.http.impl.client.HttpClients;
15 import org.apache.http.util.EntityUtils;
16 import org.jsoup.Jsoup;
17 import org.jsoup.nodes.Document;
18 import org.jsoup.nodes.Element;
19 import org.jsoup.select.Elements;
20
21 public class HttpClinet {
22
23 public static void main(String[] args) throws ClientProtocolException, IOException {
24 // 图片路径
25 String url = "https://www.mzitu.com/";
26 // 创建httpClient实例
27 CloseableHttpClient httpClient = HttpClients.createDefault();
28 HttpClinet t = new HttpClinet();
29 HttpEntity httpEntity = t.getEntity(httpClient, url);
30 String html = EntityUtils.toString(httpEntity, "UTF-8");
31 Document document = Jsoup.parse(html);
32 // 像js一样,通过标签获取title
33 // System.out.println(document.getElementsByTag("title").first());
34 // 像js一样,通过id 获取文章列表元素对象
35 Element postList = document.getElementById("pins");
36 // 像js一样,通过class 获取列表下的所有博客
37 Elements postItems = postList.select("li a");
38 // 循环处理每篇博客
39 String s = "0";
40 for (Element postItem : postItems) {
41 String urls = postItem.attr("href").trim();
42 if (!s.equals(urls)) {
43 s = urls;
44 HttpEntity httpEntitys = t.getEntity(httpClient, urls);
45 String htmls = EntityUtils.toString(httpEntitys, "UTF-8");
46 Document documents = Jsoup.parse(htmls);
47 String postLists = documents.getElementsByClass("main-image").first().select("p a img").attr("src");
48 if (postLists != null) {
49 System.out.println(postLists);
50 t.save(postLists, httpClient);
51 }
52 }
53 }
54 t.close(httpClient);
55 }
56
57 public void save(String url, CloseableHttpClient httpClient) throws ClientProtocolException, IOException {
58 String fileName = url.substring(url.lastIndexOf("."), url.length());
59 HttpEntity entity = this.getEntity(httpClient, url); // 获取返回实体
60 if (entity != null) {
61 System.out.println("Content-Type:" + entity.getContentType().getValue());
62 InputStream inputStream = entity.getContent();
63 // 文件复制,common io 包下,需要 引入依赖
64 FileUtils.copyToFile(inputStream, new File(UUID.randomUUID() + fileName));
65 }
66 }
67
68 public void close(CloseableHttpClient httpClient) throws IOException {
69 if (httpClient != null) {
70 httpClient.close();
71 }
72 }
73
74 public HttpEntity getEntity(CloseableHttpClient httpClient, String url) throws ClientProtocolException, IOException {
75 HttpGet httpGet = new HttpGet(url);
76 httpGet.setHeader("If-None-Match", "W/\"5cc2cd8f-2c58");
77 httpGet.setHeader("Referer", "http://www.mzitu.com/all/");
78 httpGet.setHeader("User-Agent",
79 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36");
80 CloseableHttpResponse response = httpClient.execute(httpGet);
81 return response.getEntity();
82 }
83 }
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>httpClient</groupId>
<artifactId>client</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>client</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.8</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
</dependencies>
</project>