作为一名光荣的中共党员,我们要时刻跟着党的步伐,学习党的政策,值此十九大结束之际,一定要做点有“意义”的事情,比如把爬个十九大报告的题目,哈哈
使用的是superagent和cheerio,github仓库的地址是https://github.com/Xuyuey/crawler
//app.js var express = require('express'); var superagent = require('superagent'); var cheerio = require('cheerio'); var path = require('path'); var app = express(); app.set('views', path.join(__dirname, '/views')) app.set('view engine','pug') app.use(express.static(path.join(__dirname, '/public'))) app.get('/',function(req,res,next){ var oUrl = 'http://piccache.cnki.net/index/images2009/other/2017/%E5%8D%81%E4%B9%9D%E5%A4%A7%E6%8A%A5%E5%91%8A/index.html'; superagent.get(oUrl) .end(function(err,sres){ if (err) return next(err); var $ =cheerio.load(sres.text); var items=[]; $('section[level=1]').each(function(index, el) { var $el = $(el); var subTopic = []; $(el).find('subsection').each(function(index, subel) { var $subel = $(subel); subTopic.push({ subName: $subel.find('.anchor-tag').text(), subHref: oUrl+"#"+$subel.find('.anchor-tag').attr('id') }); }); items.push({ name: $el.find('.anchor-tag').first().text(), href: oUrl+"#"+$el.find('.anchor-tag').first().attr('id'), subTopic: subTopic }); }); res.render('index',{ items:items }); }); }); app.listen(3000,function(){ console.log('开始监听 3000端口'); });
//- index.pug extend ./layout.pug block content ul.col.md-6 each item in items li a(href=item.href) #{item.name} if(item.subTopic.length>0) ul each subitem in item.subTopic li a(href=subitem.subHref) #{subitem.subName}
//- layout.pug doctype html html head title 中共十九大报告目录 meta( http-equiv="Content-Type" content="text/html; charset=utf-8") link(rel="stylesheet" type="text/css" href="/bootstrap/dist/css/bootstrap.min.css") style. li{list-style:none}; a{text-decoration:none;} body block content
我的项目目录