jsoup 爬取电影

 
package com.spider;
import com.mysql.jdbc.Connection;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.sql.*;
import java.util.ArrayList;
/**
 * @Author zhaoxin
 * @Email [email protected]
 * @Description //TODO
 * @Date 2018/11/29
 **/
public class DB {
    public static void main(String[] args) throws Exception {
//        connect();
        String ptah="https://www.douban.com/doulist/3907668/?tdsourcetag=s_pctim_aiomsg&qq-pf-to=pcqq.c2c";
        CloseableHttpResponse indexRes = sendGet(ptah);
        ArrayList<String> daoyan=new ArrayList<String>();
        ArrayList<String> zhuyan=new ArrayList<String>();
        ArrayList<String> leixing=new ArrayList<String>();
        ArrayList<String> guojia=new ArrayList<String>();
        ArrayList<String> year=new ArrayList<String>();
        ArrayList<String> name=new ArrayList<String>();
        ArrayList<String> url=new ArrayList<String>();
        ArrayList<String> p=new ArrayList<String>();
//        获取json内容,将其转换为字符串
        Document document=Jsoup.parse(new URL(ptah),2000);
        //拿到电影名称
        Elements moveName=document.select("div[class=title] a");
        Elements moveUrl=document.select("div[class=title] a");
        Elements moveKey=document.select("div[class=abstract]");
        Elements moveP=document.select("span[class=rating_nums]");
        for (int i=0;i<moveUrl.size();i++){
            url.add(moveUrl.get(i).attr("href"));
        }
        for (int i=0;i<moveName.size();i++){
            name.add(moveName.get(i).text());
        }
        for (int i=0;i<moveP.size();i++){
            p.add(moveP.get(i).text());
        }
        System.out.println(moveKey.get(0).text());
        for (int i=0;i<moveKey.size();i++){
            String s=moveKey.get(i).text();
            if (s.contains("导演")&&s.contains("主演")&&s.contains("类型")&&s.contains("制片国家")&&s.contains("年份")){
                daoyan.add(moveKey.get(i).text().substring(s.indexOf("导演")+4,s.indexOf("主演")));
                zhuyan.add(moveKey.get(i).text().substring(s.indexOf("主演")+4,s.indexOf("类型")));
                leixing.add(moveKey.get(i).text().substring(s.indexOf("类型")+4,s.indexOf("地区")));
                guojia.add(moveKey.get(i).text().substring(s.indexOf("地区")+4,s.indexOf("年份")));
                year.add(moveKey.get(i).text().substring(s.indexOf("年份"))+4);
            }
        }
        for (int i=0;i<20;i++){
            connect(name.get(i),url.get(i),p.get(i),daoyan.get(i),zhuyan.get(i),leixing.get(i),guojia.get(i),year.get(i));
        }
    }
    //发送get请求,获取响应结果
    public static CloseableHttpResponse sendGet(String url) throws IOException {
        //创建httpClient客户端
        CloseableHttpClient httpClient = HttpClients.createDefault();
        //创建请求对象,发送请求
        HttpGet httpGet = new HttpGet(url);
        httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36");
        CloseableHttpResponse response = httpClient.execute(httpGet);
        return response;
    }
    public static void connect(String s1,String s2,String s3,String s4,String s5,String s6,String s7,String s8)throws Exception{
        String driver="com.mysql.jdbc.Driver";
        String url="jdbc:mysql://localhost:3306/test.db?useSSL=true";
        String user="root";
        String pwd ="root";
        Connection conn = null;
        PreparedStatement stmt = null;
        // 注册 JDBC 驱动
        Class.forName("com.mysql.jdbc.Driver");
        // 打开链接
        System.out.println("连接数据库...");
        conn = (Connection) DriverManager.getConnection(url,user,pwd);
        stmt=conn.prepareStatement("insert into move values(?,?,?,?,?,?,?,?)");
        stmt.setString(1,s1);
        stmt.setString(2,s2);
        stmt.setString(3,s3);
        stmt.setString(4,s4);
        stmt.setString(5,s5);
        stmt.setString(6,s6);
        stmt.setString(7,s7);
        stmt.setString(8,s8);
        System.out.println("连接成");
        stmt.executeUpdate();
        conn.close();
    }
}

猜你喜欢

转载自blog.csdn.net/qq_37668945/article/details/84642559