直接上代码
public class JsoupDemo {
public static void main(String[] args) throws IOException {
CloseableHttpClient client = HttpClients.createDefault();
String url = "http://www.cnblogs.com";
HttpGet httpGet = new HttpGet(url);
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36");
CloseableHttpResponse response = client.execute(httpGet);
//获取实体
HttpEntity entity = response.getEntity();
String content = EntityUtils.toString(entity, "utf-8");
System.out.println("status:" + response.getStatusLine().getStatusCode());
//System.out.println(content);
Document dom = Jsoup.parse(content);
/*Elements title = dom.getElementsByTag("title");
for(Element t : title){
System.out.println(t.text());
}*/
//通过选择器寻找所有的标题
Elements elem = dom.select("#post_list .post_item .post_item_body h3 a");
for(Element e : elem){
System.out.println(e.html());
System.out.println(e.attr("href")); //获得href属性的值
}
dom.select("img[src$=.png]"); //寻找所有结尾是png的图片
response.close();
client.close();
}
}