java爬虫中jsoup的使用

                                           java爬虫中jsoup的使用

jsoup可以用来解析HTML的内容,其功能非常强大,它可以向javascript那样直接从网页中提取有用的信息

例如1:

 从html字符串中解析数据

//直接从字符串中获取

    public static void getParByString()

    {

        String html = "<html><head><title> 这里是字符串内容</title></head"+ ">"+"<body><p class='p1'> 这里是 jsoup 作用的相关演示</p></body></html>";

       Document doc = Jsoup.parse(html);

       Elements links = doc.select("p[class]");

       for(Element link:links){

        String linkclass = link.className();

扫描二维码关注公众号,回复: 8017438 查看本文章

            String linkText = link.text();

            System.out.println(linkText);

            System.out.println(linkclass);

        }

    }

   从本地文件中解析数据

//从本地文件中获取

    public static void getHrefByLocal()

    {

        File input = new File("C:\\Users\\Idea\\Desktop\\html\\Home.html");

        Document doc = null;

        try {

            doc = Jsoup.parse(input,"UTF-8","http://www.oschina.net/");     //这里后面加了网址是为了解决后面绝对路径和相对路径的问题

              }

        catch (IOExceptione) {

            // TODO Auto-generated catch block            

         e.printStackTrace();

              }

        Elements links = doc.select("a[href]");

        for(Element link:links){

            String linkHref = link.attr("href");

            String linkText = link.text();

            System.out.println(linkText+":"+linkHref);

               }

        

    }

直接从网络上解析数据

public static HashMap getHrefByNet(String url)

    {    

      HashMap hm = new HashMap();

      String href = null;

         try {

            //这是get方式得到的

            Document doc = Jsoup.connect(url).get();

            String title = doc.title();

            Elements links = doc.select("a[href]");        

            for(Element link:links){              

                String linkHref = link.attr("abs:href");

                String linkText = link.text();

                //System.out.println(linkText+":"+linkHref);                

                hm.put(linkText, linkHref);

                href=linkText;

            }

            //System.out.println("***************");

            //另外一种是post方式

            /*@SuppressWarnings("unused")

            Document doc_Post = Jsoup.connect(url)

                    .data("query","Java")

                    .userAgent("I am jsoup")

                    .cookie("auth","token")

                    .timeout(10000)

                    .post();

            Elements links_Post = doc.select("a[href]");

             for(Element link:links_Post){

                    String linkHref = link.attr("abs:href");

                    String linkText = link.text();

                    //System.out.println(linkText+":"+linkHref);

                    

                    //map.put(linkText, linkHref);

                }*/          

        } catch (IOException e) {

            // TODO Auto-generated catch block            

            e.printStackTrace();

            hm.put("加载失败", "error");

        }         

        return hm ;

    }

   

注意:需要引用的jar为以下:

import org.jsoup.*;
import org.jsoup.nodes.*;
import org.jsoup.select.Elements;

最后附上jar包下载地址:

http://jsoup.org/packages/jsoup-1.8.1.jar
 具体
实际项目请看java爬虫实战项目

 循环遍历Hashtable中的键和值

/*创建一个测试的键值对*/

Hashtable h = new Hashtable();/*往键值对中添加数据*/

h.put(key, value);/*然后依次循环取出hashtable中的键和值*/

Iterator it = h.entrySet().iterator();

        while(it.hasNext())

        {

            Map.Entry m = (Map.Entry)it.next();

            System.out.println(m.getValue());

            System.out.println(m.getKey());

        }

 java文件夹的创建(先判断是否存在,如果不存在就创建 

//创建文件夹(如果不存在就创建,存在就不变)

     public void makedir(){

         //定义文件夹路径

         String filePath = "D://home//Lucy";

         File file = new File(filePath);

         if(!file.exists()&&!file.isDirectory())

         {

             System.out.println("不存在");

             file.mkdirs();  //创建文件夹,注意mkdirs()mkdir()的区别

             //判断是否创建成功

             if(file.exists()&&file.isDirectory())   //文件夹存在并且是文件夹             {

                 System.out.println("文件夹创建成功!");

             }

             else{

                 System.out.println("文件创建不成功!");

             }

         }

         else{

             System.out.println("文件已经存在!");

         }

         

     }

 java文件的创建(先判断是否存在,如果不存在就创建)

//创建文件,如果不存在就创建文件

     public void makeFile()

     {   

         String fileName = "D://file2.txt";

         File file = new File(fileName);

         if(!file.exists()&&!file.isFile())

         {

            try {

                if(file.createNewFile())  //创建文件,返回布尔值,如果成功为true,否则为false               

                   {

                    System.out.println("文件创建成功!");

                }

                }

               catch (IOException e) {

                // TODO Auto-generated catch block                

               e.printStackTrace();

            }

         }

         else{

          System.out.println("文件已经存在!");

          }

     }

在文件中写入内容

 //往文件中写入文本

     public void writeText(String s)

     {

         String fileName = "D://file2.txt";

        File file = new File(fileName);

        if(file.exists()&&file.isFile()) //如果文件存在,可以写入内容        

                     {

            FileOutputStream fos = null;

            try {

                fos = new FileOutputStream(fileName);

            }

                catch (FileNotFoundException e2) {

                // TODO Auto-generated catch block                

                e2.printStackTrace();

            }

            try {

                fos.write(s.getBytes());

            }

                catch (IOException e1) {

                // TODO Auto-generated catch block               

                e1.printStackTrace();

            }

            try {

                fos.close();

            }

                 catch (IOException e) {

                // TODO Auto-generated catch block                

                e.printStackTrace();

            }

        }

        else{

            System.out.println("文件不存在,不能写入内容");

        }

     }

 

java获取系统时间:

public static void getTime()

    {

        SimpleDateFormat f = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");  

        Date date = new Date();

        System.out.println(f.format(date));

        System.out.println(new SimpleDateFormat("yyyyMMdd日   HHmmss").format(date));

        System.out.println(date);

    }

java连接mysql数据库

   首先添加jar包:下载jar包 

public class connectDoctorMySql {    

    /*

        public static final String url = "jdbc:mysql://192.168.0.16/hive";  

        public static final String name = "com.mysql.jdbc.Driver";  

        public static final String user = "hive";  

        public static final String password = "hive";  

        public Connection conn = null;  

        public PreparedStatement pst = null;

        public Statement stmt = null;

        ResultSet rs = null;*/

        public static final String url = "jdbc:mysql://127.0.0.1/orcl?useUnicode=true&characterEncoding=utf-8&useSSL=false";  

        public static final String name = "com.mysql.jdbc.Driver";

        public static final String user = "root";  

        public static final String password = "China123";  

        public Connection conn = null;  

        public PreparedStatement pst = null;

        public Statement stmt = null;

        ResultSet rs = null;

   //初始化数据库

     public void init(){

                 try {  

                        Class.forName(name); //指定连接类型  

                         conn = DriverManager.getConnection(url, user, password); //获取连接  

                         stmt = conn.createStatement();

                     }

                        catch (Exception e) {  

                        System.out.println("数据库连接失败. . .");

                        e.printStackTrace();  

                    }  

          }

        

   //执行sql语句

    public void excute(String sql){

            init();

            try {

                int result =stmt.executeUpdate(sql);

            }

                catch (SQLException e) {

                System.out.println("数据执行失败:"+sql);   //打印sql语句                

                    e.printStackTrace();

                }

                 finally{

                     try {

                         if (rs!=null){

                            rs.close();

                          }

                         if(pst!=null){

                           pst.close();

                            }

                         if(conn!=null) {

                          conn.close();

                          }

                  }

                   catch (SQLException e) {

                   e.printStackTrace();

                   }  

          }

        }

//查询语句

    public ArrayList select(String sql,int x,int y){

            init();

             ArrayList result= new ArrayList();

            try {

                ResultSet rs = stmt.executeQuery(sql);

                while(rs.next())

                {   String[] str = new String[2];

                    str[0]=rs.getString(x);

                    str[1]=rs.getString(y);

                    result.add(str);    

                }

            }

                catch (SQLException e) {

                e.printStackTrace();

                }

                   finally{

                       try {

                         if (rs!=null){

                            rs.close();

                          }

                         if(pst!=null){

                           pst.close();

                            }

                         if(conn!=null) {

                          conn.close();

                          }

                  }

                   catch (SQLException e) {

                   e.printStackTrace();

                   }  

          }

                return result;

        }

java连接oracle数据库

public class connectDoctor {

      //连接oracl数据库

        public static final String url = "jdbc:oracle:thin:@127.0.0.1:1521:orcl";

        //@127.0.0.1

        public static final String name = "oracle.jdbc.driver.OracleDriver";  

        public static final String user = "c238891";  

        public static final String password = "Rapid111";  

        public Connection conn = null;  

        public PreparedStatement pst = null;

        public Statement stmt = null;

        ResultSet rs = null;  

        //初始化数据库

        public void init(){

                 try {  

                        Class.forName(name); //指定连接类型  

                         conn = DriverManager.getConnection(url, user, password); //获取连接  

                         stmt = conn.createStatement();

                    }

                        catch (Exception e) {  

                        System.out.println("插入数据失败:");

                        e.printStackTrace();  

                    }  

          }

        

        //测试连接数据库

        public void start()

        {  

            init();

            String sql = "select * from emp";

            try {

                pst = conn.prepareStatement(sql);

                 rs = pst.executeQuery();  

                                 while (rs.next()) {  

                                    System.out.println("编号:" + rs.getString("empno")   

                                                    + ";姓名:" + rs.getString("ename")  

                                                    + "; 工作:" + rs.getString("job")  

                                                    + "; 领导:" + rs.getString("mgr")  

                                                    + "; 雇佣日期:" + rs.getString("hiredate")  

                                                    + "; 工资:" + rs.getString("sal")  

                                                     + "; 奖金:" + rs.getString("comm")  

                                                     + "; 部门:" + rs.getString("deptno"));  

                                 }  

            }

                catch (SQLException e) {

                e.printStackTrace();

            }

                 finally{

                 try {

                     if (rs!=null){

                     rs.close();

                     if(pst!=null)

                     {

                         pst.close();

                     }

                     if(conn!=null)

                     {

                         conn.close();

                     }

                     }

                }

                    catch (SQLException e) {

                    e.printStackTrace();

                }  

             

            }

        }

  //执行sql语句

        public void excute(String sql){

            init();

            try {

                int result =stmt.executeUpdate(sql);

            }

                catch (SQLException e) {

                System.out.println(sql);

                //System.out.println("错误");                

                     e.printStackTrace();

                }

                 finally{

                     try {

                         if (rs!=null){

                            rs.close();

                          }

                         if(pst!=null){

                           pst.close();

                          }

                         if(conn!=null) {

                          conn.close();

                          }

                  }

                   catch (SQLException e) {

                   e.printStackTrace();

                   }  

          }

        }

   

  //查询语句

        public ArrayList select(String sql,int x,int y){

            init();

             ArrayList result= new ArrayList();

            try {

                ResultSet rs = stmt.executeQuery(sql);

                while(rs.next())

                {   

                   String[] str = new String[2];

                    str[0]=rs.getString(x);

                    str[1]=rs.getString(y);

                    result.add(str);    

                }

            }

                catch (SQLException e) {

                e.printStackTrace();

                }

                       finally{

                       try {

                         if (rs!=null){

                            rs.close();

                          }

                         if(pst!=null){

                           pst.close();

                            }

                         if(conn!=null) {

                          conn.close();

                          }

                  }

                   catch (SQLException e) {

                   e.printStackTrace();

                   }  

          }

                return result;

        }

好文要顶 关注我 收藏该文  

猜你喜欢

转载自www.cnblogs.com/qingbai/p/11958814.html