package com.spider;
import com.mysql.jdbc.Connection;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.sql.*;
import java.util.ArrayList;
/**
* @Author zhaoxin
* @Email [email protected]
* @Description //TODO
* @Date 2018/11/29
**/
public class DB {
public static void main(String[] args) throws Exception {
// connect();
String ptah="https://www.douban.com/doulist/3907668/?tdsourcetag=s_pctim_aiomsg&qq-pf-to=pcqq.c2c";
CloseableHttpResponse indexRes = sendGet(ptah);
ArrayList<String> daoyan=new ArrayList<String>();
ArrayList<String> zhuyan=new ArrayList<String>();
ArrayList<String> leixing=new ArrayList<String>();
ArrayList<String> guojia=new ArrayList<String>();
ArrayList<String> year=new ArrayList<String>();
ArrayList<String> name=new ArrayList<String>();
ArrayList<String> url=new ArrayList<String>();
ArrayList<String> p=new ArrayList<String>();
// 获取json内容,将其转换为字符串
Document document=Jsoup.parse(new URL(ptah),2000);
//拿到电影名称
Elements moveName=document.select("div[class=title] a");
Elements moveUrl=document.select("div[class=title] a");
Elements moveKey=document.select("div[class=abstract]");
Elements moveP=document.select("span[class=rating_nums]");
for (int i=0;i<moveUrl.size();i++){
url.add(moveUrl.get(i).attr("href"));
}
for (int i=0;i<moveName.size();i++){
name.add(moveName.get(i).text());
}
for (int i=0;i<moveP.size();i++){
p.add(moveP.get(i).text());
}
System.out.println(moveKey.get(0).text());
for (int i=0;i<moveKey.size();i++){
String s=moveKey.get(i).text();
if (s.contains("导演")&&s.contains("主演")&&s.contains("类型")&&s.contains("制片国家")&&s.contains("年份")){
daoyan.add(moveKey.get(i).text().substring(s.indexOf("导演")+4,s.indexOf("主演")));
zhuyan.add(moveKey.get(i).text().substring(s.indexOf("主演")+4,s.indexOf("类型")));
leixing.add(moveKey.get(i).text().substring(s.indexOf("类型")+4,s.indexOf("地区")));
guojia.add(moveKey.get(i).text().substring(s.indexOf("地区")+4,s.indexOf("年份")));
year.add(moveKey.get(i).text().substring(s.indexOf("年份"))+4);
}
}
for (int i=0;i<20;i++){
connect(name.get(i),url.get(i),p.get(i),daoyan.get(i),zhuyan.get(i),leixing.get(i),guojia.get(i),year.get(i));
}
}
//发送get请求,获取响应结果
public static CloseableHttpResponse sendGet(String url) throws IOException {
//创建httpClient客户端
CloseableHttpClient httpClient = HttpClients.createDefault();
//创建请求对象,发送请求
HttpGet httpGet = new HttpGet(url);
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36");
CloseableHttpResponse response = httpClient.execute(httpGet);
return response;
}
public static void connect(String s1,String s2,String s3,String s4,String s5,String s6,String s7,String s8)throws Exception{
String driver="com.mysql.jdbc.Driver";
String url="jdbc:mysql://localhost:3306/test.db?useSSL=true";
String user="root";
String pwd ="root";
Connection conn = null;
PreparedStatement stmt = null;
// 注册 JDBC 驱动
Class.forName("com.mysql.jdbc.Driver");
// 打开链接
System.out.println("连接数据库...");
conn = (Connection) DriverManager.getConnection(url,user,pwd);
stmt=conn.prepareStatement("insert into move values(?,?,?,?,?,?,?,?)");
stmt.setString(1,s1);
stmt.setString(2,s2);
stmt.setString(3,s3);
stmt.setString(4,s4);
stmt.setString(5,s5);
stmt.setString(6,s6);
stmt.setString(7,s7);
stmt.setString(8,s8);
System.out.println("连接成");
stmt.executeUpdate();
conn.close();
}
}