java爬虫模拟jquery点击

package com.teamdev.jxbrowser.chromium.demo_sanya12.xiecheng.evment; 

import java.awt.BorderLayout; 
import java.sql.PreparedStatement; 
import java.sql.ResultSet; 
import java.sql.SQLException; 
import java.sql.Statement; 
import java.sql.Timestamp; 
import java.util.Random; 
import java.util.concurrent.CountDownLatch; 
import java.util.concurrent.TimeUnit; 
import java.util.logging.Level; 
import javax.swing.JFrame; 
import javax.swing.WindowConstants; 
import org.jsoup.Jsoup; 
import org.jsoup.nodes.Document; 
import org.jsoup.nodes.Element; 
import org.jsoup.select.Elements; 
import com.hyjx.common.CommonUtil; 
import com.hyjx.orclJdbcUtil.JDBCUtils; 
import com.hyjx.xcUtil.XcTool; 
import com.teamdev.jxbrowser.chromium.Browser; 
import com.teamdev.jxbrowser.chromium.BrowserPreferences; 
import com.teamdev.jxbrowser.chromium.JSValue; 
import com.teamdev.jxbrowser.chromium.LoggerProvider; 
import com.teamdev.jxbrowser.chromium.events.FinishLoadingEvent; 
import com.teamdev.jxbrowser.chromium.events.LoadAdapter; 
import com.teamdev.jxbrowser.chromium.swing.BrowserView; 
/** 
* 携程的评论 酒店 
* */ 
public class sy_good_xc_comment { 
public static void main(String[] args) throws Exception { 
//初始化浏览器 
LoggerProvider.getBrowserLogger().setLevel(Level.SEVERE); 
LoggerProvider.getIPCLogger().setLevel(Level.SEVERE); 
LoggerProvider.getChromiumProcessLogger().setLevel(Level.SEVERE); 

        final Browser browser = new Browser(); 
        BrowserView browserView = new BrowserView(browser); 
        BrowserPreferences preferences = browser.getPreferences(); 
        preferences.setImagesEnabled(false); 
        
        JFrame frame = new JFrame(); 
        frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE); 
        frame.add(browserView, BorderLayout.CENTER); 
        frame.setSize(700, 800); 
        frame.setLocationRelativeTo(null); 
        frame.setVisible(true); 

        
        //创建数据库 
    java.sql.Connection conOrcale  = null; 
try { 
conOrcale =    JDBCUtils.getConnection(); 
} catch (SQLException e1) { 
e1.printStackTrace(); 

String sql = null; 
PreparedStatement ps = null; 
String  sql1 = null; 
String  sql2 = null; 
Statement st2 = null; 
ResultSet rs2 = null; 
Document doc = null; 
String url = null; 
try{ 
//艺龙 
    sql1 = " select * from a_a_nm_good_12   where  plat_name='携程' and type ='酒店' and state is null "; 
    sql2 = " update  a_a_nm_good_12 set state = '评论已抓' where good_id = ? "; 
    ps = conOrcale.prepareStatement(sql2); 
    st2  = (java.sql.Statement) conOrcale.createStatement(); 
rs2  = st2.executeQuery(sql1); 
}catch (Exception e) { 
e.printStackTrace(); 

int i = 1; 
for(;rs2.next();){ 

System.out.println(i); 
i++; 
final  String good_url = rs2.getString("good_url"); 
String good_id = rs2.getString("good_id"); 
String shop_id = rs2.getString("shop_id"); 

search( good_url,shop_id,good_id, frame, browser); 
ps.setString(1,good_id); 
//ps.executeUpdate(); 



static void search(final String good_url, String shop_id,String good_id ,JFrame frame, final Browser browser)throws Exception{ 

java.sql.Connection conOrcale  = null; 
try { 
conOrcale =    JDBCUtils.getConnection(); 
} catch (SQLException e1) { 
e1.printStackTrace(); 

String sql = null; 
PreparedStatement ps2 = null; 

try{ 
sql = " insert into a_a_nm_evment_12  "+ 
"   (ev_id, shop_id,  ev_time, ev_user, ev_content,good_id)  "+ 
"  values   "+ 
"  (? ,   ?   ,   ?    ,   ?   , ? ,? )   "; 
    ps2 = conOrcale.prepareStatement(sql); 
}catch (Exception e) { 
e.printStackTrace(); 

System.out.println("good_url:"+good_url); 
invokeAndWaitReady(browser, new Runnable() { 
    public void run() { 
    browser.loadURL(good_url); 
    //browser.loadURL("http://hotel.elong.com/sanya/90574280/#review"); 
    } 
}); 

try { 
Thread.sleep(1000*1); 
} catch (InterruptedException e1) { 
e1.printStackTrace(); 

// //设置滚动条滚动速度 
//         for(int i = 1;i<10;i++){ 
//         browser.executeJavaScriptAndReturnValue("window.scrollTo(100,"+i*200+")"); 
//             try { 
//     Thread.sleep(200*1); 
//     } catch (InterruptedException e1) { 
//     e1.printStackTrace(); 
//     } 
//         } 
    
        
        int num = 1; 
        //得到html和document对象 
        String html = browser.getHTML(); 
        Document document = Jsoup.parse(html); 
        int cooment_view=document.getElementsByAttributeValue("id", "id_comment_view").size(); 
        if(cooment_view>0){ 
        browser.executeJavaScriptAndReturnValue("document.getElementsByClassName('comment_view')[0].click();"); 
        try { 
     Thread.sleep(2000*1); 
     } catch (InterruptedException e1) { 
     e1.printStackTrace(); 
     } 
     browser.executeJavaScriptAndReturnValue("document.getElementsByClassName('select_sort')[0].options[1].selected=true"); 
             browser.executeJavaScriptAndReturnValue("document.getElementById('cPageBtn').click()"); 
        try { 
     Thread.sleep(2000*1); 
     } catch (InterruptedException e1) { 
     e1.printStackTrace(); 
     } 
        //重新得到html和document对象 
         html = browser.getHTML(); 
         document = Jsoup.parse(html); 
         
        Elements elements = document.getElementsByAttributeValue("class", "comment_block J_asyncCmt"); 
        System.out.println("elements.size()"+elements.size());//输入条数 
        String EV_TIME=""; 
        String EV_USER=""; 
        String EV_CONTENT=""; 
        String shijian =""; 
        for(Element e : elements){ 
        Document parse = Jsoup.parse(e.html()); 
String EV_NUM = null; 
try { 
EV_USER = parse.getElementsByAttributeValue("class","name").text(); 
} catch (Exception e4) { 
// TODO Auto-generated catch block 
EV_USER = null; 

try { 
EV_CONTENT = parse.getElementsByAttributeValue("class","J_commentDetail").text(); 
} catch (Exception e3) { 
// TODO Auto-generated catch block 
EV_CONTENT = null; 

try { 
EV_TIME =parse.getElementsByAttributeValue("class","time").text(); 
if(EV_TIME.contains("发表于")){ 
EV_TIME=EV_TIME.replace("发表于", ""); 

} catch (Exception e2) { 
// TODO Auto-generated catch block 
EV_TIME = null; 

        shijian = EV_TIME.substring(0,7); 
if( (shijian).equals("2016-12") ){ 
System.out.println("good_url        "+good_url); 
System.out.println("评论内容      "+EV_CONTENT); 
System.out.println("评论人        "+EV_USER); 
System.out.println("评论时间         "+EV_TIME); 
System.out.println("评论分数        "+EV_NUM); 
        try { 
        //(ev_id, shop_id,  ev_time, ev_user, ev_content,good_id)  "+ 
ps2.setString(1, CommonUtil.getUUID32()); 
ps2.setString(2, shop_id); 
ps2.setString(3, EV_TIME); 
ps2.setString(4, EV_USER); 
ps2.setString(5, EV_CONTENT); 
ps2.setString(6, good_id); 
//ps2.executeUpdate(); 
} catch (Exception e1) { 
// TODO Auto-generated catch block 
e1.printStackTrace(); 

        System.out.println("---------------------------------------------------------------------------------------------"); 
       }else{return;} 
        } 
        //判断是否有下一页,如果有下一页,继续插入数据 
             while(jumpNext(browser)){ 
             num++; 
             //超过20页的不抓 
//              if(num>=20){ 
//              break; 
//              } 
             try { 
        Thread.sleep(2000*1); 
        } catch (InterruptedException e1) { 
        e1.printStackTrace(); 
        } 
//              for(int i = 1;i<10;i++){ 
//             browser.executeJavaScriptAndReturnValue("window.scrollTo(100,"+i*200+")"); 
//                 try { 
//         Thread.sleep(200*1); 
//         } catch (InterruptedException e1) { 
//         e1.printStackTrace(); 
//         } 
//             } 
            html = browser.getHTML(); 
            document = Jsoup.parse(html); 
             Elements elementss = document.getElementsByAttributeValue("class", "comment_block J_asyncCmt"); 
            System.out.println("elements.size()"+elementss.size());//输入条数 
            for(Element e : elementss){ 
            Document parse = Jsoup.parse(e.html()); 
            try { 
    EV_USER = parse.getElementsByAttributeValue("class","name").text(); 
    } catch (Exception e4) { 
    // TODO Auto-generated catch block 
    EV_USER = null; 
    } 
    try { 
    EV_CONTENT = parse.getElementsByAttributeValue("class","J_commentDetail").text(); 
    } catch (Exception e3) { 
    // TODO Auto-generated catch block 
    EV_CONTENT = null; 
    } 
    try { 
    EV_TIME =parse.getElementsByAttributeValue("class","time").text(); 
    if(EV_TIME.contains("发表于")){ 
    EV_TIME=EV_TIME.replace("发表于", ""); 
    } 
    } catch (Exception e2) { 
    // TODO Auto-generated catch block 
    EV_TIME = null; 
    } 
          shijian = EV_TIME.substring(0,7); 
        if( (shijian).equals("2016-12") ){ 
        System.out.println("客体url:"+good_url); 
System.out.println("评论内容:"+EV_CONTENT); 
System.out.println("评论人: "+EV_USER); 
System.out.println("评论时间:"+EV_TIME); 
            try { 
            ps2.setString(1, CommonUtil.getUUID32()); 
ps2.setString(2, shop_id); 
ps2.setString(3, EV_TIME); 
ps2.setString(4, EV_USER); 
ps2.setString(5, EV_CONTENT); 
ps2.setString(6, good_id); 
    //ps2.executeUpdate(); 
    } catch (Exception e1) { 
    // TODO Auto-generated catch block 
    e1.printStackTrace(); 
    } 
            System.out.println("---------------------------------------------------------------------------------------------"); 
            }else{return;} 
            } 
             }    
    }else{ 
    return; 
    } 
    
    } 

/** 
* 首先判断是否有下一页按钮 
* @param browser 
* @return 
*/ 
public static boolean jumpNext(Browser browser){ 

  String html = browser.getHTML(); 
      Document document = Jsoup.parse(html); 
      int num_page_next=0; 
  try { 
num_page_next = document.getElementsByAttributeValue("class", "c_down").size(); 
      } catch (Exception e) { 
return false; 
  } 
      if(num_page_next>0){ 
      System.out.println("点击下一页"); 
      browser.executeJavaScriptAndReturnValue("document.getElementsByClassName('c_down')[0].click()"); 
      //browser.executeJavaScriptAndReturnValue("document.getElementsById('comment_paging').getElementsByClassName('page_next')[0].click()");  
      //  browser.executeJavaScriptAndReturnValue("document.select('#comment_paging').getElementsByTagName('a')["+(num_pagesize-1)+"].click()"); 
      }else{ 
      return false; 
      } 
    return true; 


// JSValue nextButtonNum = browser.executeJavaScriptAndReturnValue("document.getElementsByClassName('h-pagination pb20')[0].getElementsByClassName('a')[0].length"); 
//
// System.out.println("下一页按钮数量:"+nextButtonNum.getNumber()); 
// int num = ((Number)nextButtonNum.getNumber()).intValue(); 
// if(num == 0){ 
// return false; 
// } 
// browser.executeJavaScriptAndReturnValue("document.getElementsByClassName('h-pagination pb20')[0].getElementsByClassName('a')[0].click()"); 
//    return true; 
//} 
public static void invokeAndWaitReady(Browser browser, Runnable runnable) { 
    final CountDownLatch latch = new CountDownLatch(1); 
    LoadAdapter listener = new LoadAdapter() { 
        @Override 
        public void onFinishLoadingFrame(FinishLoadingEvent event) { 
            if (event.isMainFrame()) { 
                latch.countDown(); 
            } 
        } 
    }; 
    browser.addLoadListener(listener); 
    try { 
        runnable.run(); 
        try { 
            if (!latch.await(60, TimeUnit.SECONDS)) { 
                //throw new RuntimeException(new TimeoutException()); 
            } 
        } catch (InterruptedException ignore) { 
        ignore.printStackTrace(); 
            Thread.currentThread().interrupt(); 
        } 
    } finally { 
        browser.removeLoadListener(listener); 
    } 



}

猜你喜欢

转载自blog.csdn.net/W_DongQiang/article/details/81907114