package com.wilian.astro.webmagic.follifollie;
import com.wilian.astro.webmagic.vo.WatchVO;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
/**
*
* @ClassName: OfficialWebsitePageProcessor
* @Description: 官网数据
* @author PENGDI052
* @date 2018年4月16日
*
*/
public class OfficialWebsitePageProcessor implements PageProcessor {
// 官网手表入口页面&分页
// http://www.follifollie.com.cn/ch-ch/online-shop/watches/all#pg=1
// 产品详情页面
// http://www.follifollie.com.cn/ch-ch/online-shop/watches/jewelled/wf9a019bsw_xx-carousel-%E7%B3%BB%E5%88%97%E6%89%8B%E8%A1%A8
private static final String DETAIL_URL = "http://www.follifollie.com.cn/ch-ch/online-shop/watches/[a-zA-z]+/[\\w]+";
private static final String LIST_URL = "http://www.follifollie.com.cn/ch-ch/online-shop/watches/all#pg=[0-9]+";
private Site site = Site.me().setRetryTimes(3).setSleepTime(1000);
public void process(Page page) {
System.out.println("11");
if (page.getUrl().regex(LIST_URL).match()) {
page.addTargetRequests(page.getHtml().links().regex(DETAIL_URL).all());
}
else{
WatchVO watch = new WatchVO();
watch.setSource("官网");
watch.setUrl(page.getUrl().get());
watch.setBrand("Folli Follie");
watch.setSeries(page.getHtml().xpath("/html/body/div[1]/section[1]/div/div/div[3]/div/div/div[1]/div[2]/div/h1/text()").get());
System.out.println(watch.toString());
}
}
public Site getSite() {
return site;
}
public static void main(String[] args) {
Spider.create(new OfficialWebsitePageProcessor())
.addUrl(new String[]{"http://www.follifollie.com.cn/ch-ch/online-shop/watches/all#pg=1"})
.thread(5)
.run();
}
}
log4j.properties
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.Threshold=INFO
log4j.appender.stdout.ImmediateFlush=true
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p %X{RequestId} - %m%n
log4j.appender.mongodb=org.apache.log4j.ConsoleAppender
log4j.appender.mongodb.Target=System.out
log4j.appender.mongodb.Threshold=DEBUG
log4j.appender.mongodb.ImmediateFlush=true
log4j.appender.mongodb.layout=org.apache.log4j.PatternLayout
log4j.appender.mongodb.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p %X{RequestId} - %m%n
package com.wilian.astro.webmagic.vo;
import java.util.Date;
public class WatchVO {
private String brand; // 品牌
private String imgUrl; // 图片
private String salePrice ;//当前售价
private String discount; //折扣
private String price ; //挂牌价格
private String model; //型号,品牌下的具体型号
private String style ; //类型,石英,陶瓷,机械
private String series ;// 系列
private String source; //数据来源
private String url ; //来源网址
private String stock; //库存
private Date createDate;//创建时间
private Date updateDate; //更新时间
public String getBrand() {
return brand;
}
public void setBrand(String brand) {
this.brand = brand;
}
public String getImgUrl() {
return imgUrl;
}
public void setImgUrl(String imgUrl) {
this.imgUrl = imgUrl;
}
public String getSalePrice() {
return salePrice;
}
public void setSalePrice(String salePrice) {
this.salePrice = salePrice;
}
public String getDiscount() {
return discount;
}
public void setDiscount(String discount) {
this.discount = discount;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
public String getStyle() {
return style;
}
public void setStyle(String style) {
this.style = style;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getStock() {
return stock;
}
public void setStock(String stock) {
this.stock = stock;
}
public Date getCreateDate() {
return createDate;
}
public void setCreateDate(Date createDate) {
this.createDate = createDate;
}
public Date getUpdateDate() {
return updateDate;
}
public void setUpdateDate(Date updateDate) {
this.updateDate = updateDate;
}
public String getModel() {
return model;
}
public void setModel(String model) {
this.model = model;
}
public String getSeries() {
return series;
}
public void setSeries(String series) {
this.series = series;
}
public String toString() {
return "WatchVO [brand=" + brand + ", imgUrl=" + imgUrl
+ ", salePrice=" + salePrice + ", discount=" + discount
+ ", price=" + price + ", model=" + model + ", style=" + style
+ ", series=" + series + ", source=" + source + ", url=" + url
+ ", stock=" + stock + ", createDate=" + createDate
+ ", updateDate=" + updateDate + "]";
}
}