一、数据源介绍:https://www.jianshu.com/p/c54e25349b77
1、api
网易:
https://3g.163.com
新闻:/touch/reconstruct/article/list/BBM54PGAwangning/0-10.html
娱乐:/touch/reconstruct/article/list/BA10TA81wangning/0-10.html
体育:/touch/reconstruct/article/list/BA8E6OEOwangning/0-10.html
财经:/touch/reconstruct/article/list/BA8EE5GMwangning/0-10.html
军事:/touch/reconstruct/article/list/BAI67OGGwangning/0-10.html
科技:/touch/reconstruct/article/list/BA8D4A3Rwangning/0-10.html
手机:/touch/reconstruct/article/list/BAI6I0O5wangning/0-10.html
数码:/touch/reconstruct/article/list/BAI6JOD9wangning/0-10.html
时尚:/touch/reconstruct/article/list/BA8F6ICNwangning/0-10.html
游戏:/touch/reconstruct/article/list/BAI6RHDKwangning/0-10.html
教育:/touch/reconstruct/article/list/BA8FF5PRwangning/0-10.html
健康:/touch/reconstruct/article/list/BDC4QSV3wangning/0-10.html
旅游:/touch/reconstruct/article/list/BEO4GINLwangning/0-10.html
视频:/touch/nc/api/video/recommend/Video_Recom/0-10.do?callback=videoList
2、数据结构:
新闻:/touch/reconstruct/article/list/BBM54PGAwangning/0-10.html
娱乐:/touch/reconstruct/article/list/BA10TA81wangning/0-10.html
体育:/touch/reconstruct/article/list/BA8E6OEOwangning/0-10.html
财经:/touch/reconstruct/article/list/BA8EE5GMwangning/0-10.html
军事:/touch/reconstruct/article/list/BAI67OGGwangning/0-10.html
科技:/touch/reconstruct/article/list/BA8D4A3Rwangning/0-10.html
手机:/touch/reconstruct/article/list/BAI6I0O5wangning/0-10.html
数码:/touch/reconstruct/article/list/BAI6JOD9wangning/0-10.html
时尚:/touch/reconstruct/article/list/BA8F6ICNwangning/0-10.html
游戏:/touch/reconstruct/article/list/BAI6RHDKwangning/0-10.html
教育:/touch/reconstruct/article/list/BA8FF5PRwangning/0-10.html
健康:/touch/reconstruct/article/list/BDC4QSV3wangning/0-10.html
旅游:/touch/reconstruct/article/list/BEO4GINLwangning/0-10.html
视频:/touch/nc/api/video/recommend/Video_Recom/0-10.do?callback=videoList
{ "liveInfo": null, "docid": "F9R8L9K70001899O", "source": "国家卫健委", "title": "昨日全国新增确诊42例 其中境外输入38例本土4例", "priority": 100, "hasImg": 1, "url": "https://3g.163.com/news/20/0410/07/F9R8L9K70001899O.html", "skipURL": "http://3g.163.com/ntes/special/00340EPA/wapSpecialModule.html?sid=S1578049488158", "specialID": "S1578049488158", "commentCount": 4117, "imgsrc3gtype": "1", "stitle": "S1578049488158", "digest": "4月9日0—24时,31个省(自治区、直辖市)和新疆生产建设", "skipType": "special", "imgsrc": "http://cms-bucket.ws.126.net/2020/0410/7c943a79p00q8jnz6009lc000s600e3c.png", "ptime": "2020-04-10 07:26:48" }
二、实现代码
1、封装的发送URL请求并返回json格式的字符串的工具类
package com.me.utils; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; public class HttpUtil { /** * 返回json * @param setUrl * @return */ public static String setUrl(String setUrl){ try { URL url = new URL(setUrl); HttpURLConnection conn = (HttpURLConnection)url.openConnection(); conn.setConnectTimeout(5000); conn.setRequestMethod("GET"); int responseCode = conn.getResponseCode(); if (responseCode == HttpURLConnection.HTTP_OK){ InputStream inputStream = conn.getInputStream(); InputStreamReader inputStreamReader = new InputStreamReader(inputStream); BufferedReader reader = new BufferedReader(inputStreamReader); StringBuffer stringBuffer = new StringBuffer(); String string = reader.readLine(); while (string != null) { stringBuffer.append(string); string = reader.readLine(); } return stringBuffer.toString(); } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return ""; } }
2、封装使用jsoup解析新闻内容的工具类
package com.me.utils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; public class JsoupNewsUtil { /** * * @param data * @return News_dg */ public static String zw(String data){ Document document = Jsoup.parse(data); //获取新闻的内容 Elements content = document.getElementsByClass("content"); return content.text().trim(); } }
3、新闻的实体类
package com.me.domain; public class News { private int id; private int priority; private int commentCount; private String source; private String title; private String url; private String digest; private String imgsrc; private String ptime; private String zw; private String type; public String getType() { return type; } public void setType(String type) { this.type = type; } @Override public String toString() { return "News{" + "id=" + id + ", priority=" + priority + ", commentCount=" + commentCount + ", source='" + source + '\'' + ", title='" + title + '\'' + ", url='" + url + '\'' + ", digest='" + digest + '\'' + ", imgsrc='" + imgsrc + '\'' + ", ptime='" + ptime + '\'' + ", zw='" + zw + '\'' + '}'; } public String getZw() { return zw; } public void setZw(String zw) { this.zw = zw; } public int getId() { return id; } public void setId(int id) { this.id = id; } public int getPriority() { return priority; } public void setPriority(int priority) { this.priority = priority; } public int getCommentCount() { return commentCount; } public void setCommentCount(int commentCount) { this.commentCount = commentCount; } public String getSource() { return source; } public void setSource(String source) { this.source = source; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } public String getDigest() { return digest; } public void setDigest(String digest) { this.digest = digest; } public String getImgsrc() { return imgsrc; } public void setImgsrc(String imgsrc) { this.imgsrc = imgsrc; } public String getPtime() { return ptime; } public void setPtime(String ptime) { this.ptime = ptime; } }
package com.me.domain; import java.util.List; public class NewsListData { private List<News> BBM54PGAwangning; private List<News> BA10TA81wangning; private List<News> BA8E6OEOwangning; private List<News> BA8EE5GMwangning; private List<News> BAI67OGGwangning; private List<News> BA8D4A3Rwangning; private List<News> BAI6I0O5wangning; private List<News> BAI6JOD9wangning; private List<News> BA8F6ICNwangning; private List<News> BAI6RHDKwangning; private List<News> BA8FF5PRwangning; private List<News> BDC4QSV3wangning; private List<News> BEO4GINLwangning; public List<News> getBA10TA81wangning() { return BA10TA81wangning; } public void setBA10TA81wangning(List<News> BA10TA81wangning) { this.BA10TA81wangning = BA10TA81wangning; } public List<News> getBA8E6OEOwangning() { return BA8E6OEOwangning; } public void setBA8E6OEOwangning(List<News> BA8E6OEOwangning) { this.BA8E6OEOwangning = BA8E6OEOwangning; } public List<News> getBA8EE5GMwangning() { return BA8EE5GMwangning; } public void setBA8EE5GMwangning(List<News> BA8EE5GMwangning) { this.BA8EE5GMwangning = BA8EE5GMwangning; } public List<News> getBAI67OGGwangning() { return BAI67OGGwangning; } public void setBAI67OGGwangning(List<News> BAI67OGGwangning) { this.BAI67OGGwangning = BAI67OGGwangning; } public List<News> getBA8D4A3Rwangning() { return BA8D4A3Rwangning; } public void setBA8D4A3Rwangning(List<News> BA8D4A3Rwangning) { this.BA8D4A3Rwangning = BA8D4A3Rwangning; } public List<News> getBAI6I0O5wangning() { return BAI6I0O5wangning; } public void setBAI6I0O5wangning(List<News> BAI6I0O5wangning) { this.BAI6I0O5wangning = BAI6I0O5wangning; } public List<News> getBAI6JOD9wangning() { return BAI6JOD9wangning; } public void setBAI6JOD9wangning(List<News> BAI6JOD9wangning) { this.BAI6JOD9wangning = BAI6JOD9wangning; } public List<News> getBA8F6ICNwangning() { return BA8F6ICNwangning; } public void setBA8F6ICNwangning(List<News> BA8F6ICNwangning) { this.BA8F6ICNwangning = BA8F6ICNwangning; } public List<News> getBAI6RHDKwangning() { return BAI6RHDKwangning; } public void setBAI6RHDKwangning(List<News> BAI6RHDKwangning) { this.BAI6RHDKwangning = BAI6RHDKwangning; } public List<News> getBA8FF5PRwangning() { return BA8FF5PRwangning; } public void setBA8FF5PRwangning(List<News> BA8FF5PRwangning) { this.BA8FF5PRwangning = BA8FF5PRwangning; } public List<News> getBDC4QSV3wangning() { return BDC4QSV3wangning; } public void setBDC4QSV3wangning(List<News> BDC4QSV3wangning) { this.BDC4QSV3wangning = BDC4QSV3wangning; } public List<News> getBEO4GINLwangning() { return BEO4GINLwangning; } public void setBEO4GINLwangning(List<News> BEO4GINLwangning) { this.BEO4GINLwangning = BEO4GINLwangning; } public List<News> getBBM54PGAwangning() { return BBM54PGAwangning; } public void setBBM54PGAwangning(List<News> BBM54PGAwangning) { this.BBM54PGAwangning = BBM54PGAwangning; } }
4、dao层
package com.me.dao; import com.me.domain.News; import com.me.utils.DBUtils; import org.apache.commons.dbutils.QueryRunner; import org.apache.commons.dbutils.handlers.BeanListHandler; import java.sql.SQLException; import java.util.List; public class NewsDao { /** * * @return * @throws SQLException */ public boolean deleteAll() throws SQLException { QueryRunner qr =new QueryRunner(DBUtils.getDataSource()); String sql="delete from newslist "; int n = qr.update(sql); if (n > 0) { return true; } else { return false; } } /** * * @param zw * @param id * @return * @throws SQLException */ public boolean zw(String zw,int id) throws SQLException { QueryRunner qr = new QueryRunner(DBUtils.getDataSource()); String sql = "update newslist set zw = ? where id=? "; int n = qr.update(sql, zw,id); if (n > 0) { return true; } else { return false; } } /** * * @return List<News> * @throws SQLException */ public List<News> newsList() throws SQLException { QueryRunner qr = new QueryRunner(DBUtils.getDataSource()); String sql = "select * from newslist where url != null or url != '17KK0006|2145432'or url != ?"; List<News> query = qr.query(sql, new BeanListHandler<News>(News.class),""); return query; } /** * * @param world * @return * @throws SQLException */ public List<News> search(String world) throws SQLException { QueryRunner qr = new QueryRunner(DBUtils.getDataSource()); String sql = "select * from newslist where (url != null or url != ?) and title like ? limit 0 , 5"; List<News> query = qr.query(sql, new BeanListHandler<News>(News.class),"","'%"+world+"%'"); return query; } /** * * @param news * @return * @throws SQLException */ public boolean add(News news) throws SQLException { QueryRunner qr = new QueryRunner(DBUtils.getDataSource()); String sql = "insert into newslist (source,title,priority,url,commentCount,digest,imgsrc,ptime,type) " + "values(?,?,?,?,?,?,?,?,?)"; int update = qr.update(sql,news.getSource(),news.getTitle(),news.getPriority(),news.getUrl(),news.getCommentCount(), news.getDigest(),news.getImgsrc(),news.getPtime(),news.getType()); if (update > 0) { return true; } else { return false; } } }
5、service层
package com.me.service; import com.google.gson.Gson; import com.me.dao.NewsDao; import com.me.domain.News; import com.me.domain.NewsListData; import com.me.utils.HttpUtil; import com.me.utils.JsoupNewsUtil; import java.sql.SQLException; import java.util.List; public class NewsList { NewsDao dao = new NewsDao(); public static void main(String[] args) throws SQLException { NewsList newsList = new NewsList(); /*String url = "https://3g.163.com/touch/reconstruct/article/list/"; String [] typeArray = {"BBM54PGAwangning","BA10TA81wangning","BA8E6OEOwangning" ,"BA8EE5GMwangning","BAI67OGGwangning","BA8D4A3Rwangning","BAI6I0O5wangning" ,"BAI6JOD9wangning","BA8F6ICNwangning","BAI6RHDKwangning","BA8FF5PRwangning" ,"BDC4QSV3wangning","BEO4GINLwangning"}; for (int i = 0; i < typeArray.length; i++) { }*/ newsList.deleteAll(); newsList.addXW("https://3g.163.com/touch/reconstruct/article/list/BBM54PGAwangning/0-20.html"); newsList.addYL("https://3g.163.com/touch/reconstruct/article/list/BA10TA81wangning/0-20.html"); newsList.addTY("https://3g.163.com/touch/reconstruct/article/list/BA8E6OEOwangning/0-20.html"); newsList.addCJ("https://3g.163.com/touch/reconstruct/article/list/BA8EE5GMwangning/0-20.html"); newsList.addJS("https://3g.163.com/touch/reconstruct/article/list/BAI67OGGwangning/0-20.html"); newsList.addKJ("https://3g.163.com/touch/reconstruct/article/list/BA8D4A3Rwangning/0-20.html"); newsList.addSJ("https://3g.163.com/touch/reconstruct/article/list/BAI6I0O5wangning/0-20.html"); newsList.addSM("https://3g.163.com/touch/reconstruct/article/list/BAI6JOD9wangning/0-20.html"); newsList.addSS("https://3g.163.com/touch/reconstruct/article/list/BA8F6ICNwangning/0-20.html"); newsList.addYX("https://3g.163.com/touch/reconstruct/article/list/BAI6RHDKwangning/0-20.html"); newsList.addJY("https://3g.163.com/touch/reconstruct/article/list/BA8FF5PRwangning/0-20.html"); newsList.addJK("https://3g.163.com/touch/reconstruct/article/list/BDC4QSV3wangning/0-20.html"); newsList.addLY("https://3g.163.com/touch/reconstruct/article/list/BEO4GINLwangning/0-20.html"); newsList.zw(); // newsList.test(); } /** * type : 军事 * @param url * @throws SQLException */ public void addJS(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBAI67OGGwangning().size(); i++) { News n = newsListData.getBAI67OGGwangning().get(i); n.setType("军事"); dao.add(n); } } /** * type : 旅游 * @param url * @throws SQLException */ public void addLY(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBEO4GINLwangning().size(); i++) { News n = newsListData.getBEO4GINLwangning().get(i); n.setType("旅游"); dao.add(n); } } /** * type : 健康 * @param url * @throws SQLException */ public void addJK(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBDC4QSV3wangning().size(); i++) { News n = newsListData.getBDC4QSV3wangning().get(i); n.setType("健康"); dao.add(n); } }/** * type : 教育 * @param url * @throws SQLException */ public void addJY(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBA8FF5PRwangning().size(); i++) { News n = newsListData.getBA8FF5PRwangning().get(i); n.setType("教育"); dao.add(n); } }/** * type : 游戏 * @param url * @throws SQLException */ public void addYX(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBAI6RHDKwangning().size(); i++) { News n = newsListData.getBAI6RHDKwangning().get(i); n.setType("游戏"); dao.add(n); } }/** * type : 时尚 * @param url * @throws SQLException */ public void addSS(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBA8F6ICNwangning().size(); i++) { News n = newsListData.getBA8F6ICNwangning().get(i); n.setType("时尚"); dao.add(n); } }/** * type : 数码 * @param url * @throws SQLException */ public void addSM(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBAI6JOD9wangning().size(); i++) { News n = newsListData.getBAI6JOD9wangning().get(i); n.setType("数码"); dao.add(n); } }/** * type : 手机 * @param url * @throws SQLException */ public void addSJ(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBAI6I0O5wangning().size(); i++) { News n = newsListData.getBAI6I0O5wangning().get(i); n.setType("手机"); dao.add(n); } } /** * type : 科技 * @param url * @throws SQLException */ public void addKJ(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBA8D4A3Rwangning().size(); i++) { News n = newsListData.getBA8D4A3Rwangning().get(i); n.setType("科技"); dao.add(n); } } /** * type : 财经 * @param url * @throws SQLException */ public void addCJ(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBA8EE5GMwangning().size(); i++) { News n = newsListData.getBA8EE5GMwangning().get(i); n.setType("财经"); dao.add(n); } } /** * type : 体育 * @param url * @throws SQLException */ public void addTY(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBA8E6OEOwangning().size(); i++) { News n = newsListData.getBA8E6OEOwangning().get(i); n.setType("体育"); dao.add(n); } } /** * type : 娱乐 * @param url * @throws SQLException */ public void addYL(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBA10TA81wangning().size(); i++) { News n = newsListData.getBA10TA81wangning().get(i); n.setType("娱乐"); dao.add(n); } } /** * 新闻 * @param url * @throws SQLException */ public void addXW(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBBM54PGAwangning().size(); i++) { News n = newsListData.getBBM54PGAwangning().get(i); n.setType("新闻"); dao.add(n); } } /** * 正文 * @throws SQLException */ public void zw() throws SQLException { List<News> news = dao.newsList(); for (int i = 0; i < news.size(); i++) { News n = news.get(i); String s = HttpUtil.setUrl(n.getUrl()); String zw = JsoupNewsUtil.zw(s); dao.zw(zw,n.getId()); } } /** * 清空 * @throws SQLException */ public void deleteAll() throws SQLException { dao.deleteAll(); } /** * 测试 */ public void test (){ Gson gson = new Gson(); News news = gson.fromJson("{\n" + " \"imgextra\": [\n" + " {\n" + " \"imgsrc\": \"http://cms-bucket.ws.126.net/2020/0410/4ef9af5aj00q8jdsh00bpc000sg00sgc.jpg\"\n" + " },\n" + " {\n" + " \"imgsrc\": \"http://cms-bucket.ws.126.net/2020/0410/6f7bd38bj00q8jdsh003xc000sg00dic.jpg\"\n" + " }\n" + " ],\n" + " \"liveInfo\": null,\n" + " \"docid\": \"17KK0006|2145433\",\n" + " \"source\": \"极客鲜疯队\",\n" + " \"title\": \"宅家赏美丽高原 一生中值得一看的美景\",\n" + " \"priority\": 150,\n" + " \"url\": \"17KK0006|2145433\",\n" + " \"skipURL\": \"http://3g.163.com/touch/photoview.html?channelid=0006&setid=2145433\",\n" + " \"commentCount\": 15,\n" + " \"imgsrc3gtype\": \"2\",\n" + " \"stitle\": \"17KK0006|2145433\",\n" + " \"digest\": \"\",\n" + " \"skipType\": \"photoset\",\n" + " \"photosetID\": \"0006|2145433\",\n" + " \"imgsrc\": \"http://cms-bucket.ws.126.net/2020/0410/1bd79be9j00q8jdsh008tc000sg00izc.jpg\",\n" + " \"ptime\": \"2020-04-10 03:51:04\",\n" + " \"modelmode\": \"u\"\n" + " }", News.class); System.out.println(news.toString()); } }