JSoup的简单使用

版权声明:Leo.All Rights Reserved. https://blog.csdn.net/qq_41113081/article/details/89598355

要解析的网页地址:http://202.194.143.19/asord/asord_hist.php?page=1

要解析的网页表格,一共86页

要解析的页面

URL后面的page参数代表了一共多少页,这里采取手动输入的方式,目前共86页
并且解析完成后,存入数据库
数据库表

数据表对应的实体类

package com.leo;

import java.util.Date;

public class Book {
    /**
     *
     */
    private Integer id;

    /**
     * 题名
     */
    private String name;

    /**
     * 责任者
     */
    private String author;

    /**
     * 出版信息
     */
    private String press;

    /**
     * 荐购日期
     */
    private Date date;

    /**
     * 荐购状态
     */
    private String status;

    /**
     * 处理备注
     */
    private String remark;

    public Book() {
    }

    public Book(Integer id, String name, String author, String press, Date date, String status, String remark) {
        this.id = id;
        this.name = name;
        this.author = author;
        this.press = press;
        this.date = date;
        this.status = status;
        this.remark = remark;
    }

    public Book(String name, String author, String press, Date date, String status, String remark) {
        this.name = name;
        this.author = author;
        this.press = press;
        this.date = date;
        this.status = status;
        this.remark = remark;
    }

    @Override
    public String toString() {
        return "Book{" +
                "id=" + id +
                ", name='" + name + '\'' +
                ", author='" + author + '\'' +
                ", press='" + press + '\'' +
                ", date=" + date +
                ", status='" + status + '\'' +
                ", remark='" + remark + '\'' +
                '}';
    }


    public Book getBook() {
        return this;
    }

    public Integer getId() {
        return id;
    }

    public void setId(Integer id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getAuthor() {
        return author;
    }

    public void setAuthor(String author) {
        this.author = author;
    }

    public String getPress() {
        return press;
    }

    public void setPress(String press) {
        this.press = press;
    }

    public Date getDate() {
        return date;
    }

    public void setDate(Date date) {
        this.date = date;
    }

    public String getStatus() {
        return status;
    }

    public void setStatus(String status) {
        this.status = status;
    }

    public String getRemark() {
        return remark;
    }

    public void setRemark(String remark) {
        this.remark = remark;
    }
}
SqlSession sqlSession = DBSession.getSession();
        List<Document> documents = new ArrayList<>(88);
        try {
            for (int i = 0; i < 86; i++) {
                System.out.print("正在解析第 " + (i + 1) + " 页 : ");
                documents.add(Jsoup.connect("http://202.194.143.19/asord/asord_hist.php")
                        .data("page", String.valueOf(i + 1))
                        .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36")
                        .timeout(3000)
                        .get());
                System.out.println("解析成功");
                //Thread.sleep(500);
            }
            for (int i = 0; i < documents.size(); i++) {
                Elements table = documents.get(i).getElementsByClass("table_line");
                Elements tr = table.select("tr");
                for (int j = 1; j < tr.size(); j++) {
                    Elements tds = tr.get(j).getElementsByClass("whitetext");
                    Book book = new Book(
                            tds.get(1).text(),
                            tds.get(2).text(),
                            tds.get(3).text(),
                            Date.valueOf(tds.get(4).text()),
                            tds.get(5).text(),
                            tds.get(6).text());
                    sqlSession.insert("insert", book);
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        sqlSession.commit();
    }

数据库
可以参考鸿洋大神的博客,设计一个可复用的解析模块:https://blog.csdn.net/lmj623565791/article/details/23272657

猜你喜欢

转载自blog.csdn.net/qq_41113081/article/details/89598355