-
rule:
href: { select: img:eq(0), attr: href, filter: -a -#footer img, callback: [regex, '/^abc(\d)$/'] }
content: { select: img:eq(0), attr: text, filter: -a -#footer img, callback: [func_name, attr]}
range: '#main>ul>li'
url: 'concat(href, content)'
jump: url
-
rule:
href: { select: img:eq(0), attr: href, filter: -a -#footer img, callback: [regex, '/^abc(\d)$/'] }
content: { select: img:eq(0), attr: text, filter: -a -#footer img, callback: [func_name, attr]}
range: '#main>ul>li'
url: href
jump: url
rules = {
#书籍列表
book: {
list: {
url: 'http://all.qidian.com/book/bookstore.aspx?PageIndex=:page',
path: 'div.twoleft',
item: {
path: 'div.sw2,div.sw1',
segments: {
title: {
path: 'div.swb/span.swbt/a',
type: 'href'
},
last_chapter: {
path: 'div.swb/a.hui2',
type: 'href'
},
word_count: 'div.swc',
author: {
path: 'div.swd/a',
type: 'href',
save_url: false
},
last_updated_at: 'div.swe',
category_id: {
path: 'div.swa',
pattern: 'SubCategoryId=(\d+)',
category: 'regexp'
}
},
},
paging: {
path: 'div.storelistbottom',
current_page: 'a.f_s',
pages: 'a.f_a'
}
},
#书籍明细,如图片,公告,评论等
info: {
},
#书籍章节列表
chapter: {
},
#书籍正文信息
content: {
}
}
}
Source.create name: '起点中文网', code: 'qidian', url: 'http://qidian.com', rules: rules.to_yaml