Node.js爬取十九大报告标题链接

作为一名光荣的中共党员,我们要时刻跟着党的步伐,学习党的政策,值此十九大结束之际,一定要做点有“意义”的事情,比如把爬个十九大报告的题目,哈哈得意得意

使用的是superagent和cheerio,github仓库的地址是https://github.com/Xuyuey/crawler

//app.js
var express = require('express');
var superagent = require('superagent');
var cheerio = require('cheerio');
var path = require('path');

var app = express();
app.set('views', path.join(__dirname, '/views')) 
app.set('view engine','pug') 
app.use(express.static(path.join(__dirname, '/public')))

app.get('/',function(req,res,next){
	var oUrl = 'http://piccache.cnki.net/index/images2009/other/2017/%E5%8D%81%E4%B9%9D%E5%A4%A7%E6%8A%A5%E5%91%8A/index.html';
	superagent.get(oUrl)
	.end(function(err,sres){
		if (err)
			return next(err);
		var $ =cheerio.load(sres.text);
		var items=[];

		$('section[level=1]').each(function(index, el) {
			var $el = $(el);
			var subTopic = [];
			$(el).find('subsection').each(function(index, subel) {
				var $subel = $(subel);
				subTopic.push({
					subName: $subel.find('.anchor-tag').text(),
					subHref: oUrl+"#"+$subel.find('.anchor-tag').attr('id')
				});		
			});

			items.push({
					name: $el.find('.anchor-tag').first().text(),
					href: oUrl+"#"+$el.find('.anchor-tag').first().attr('id'),
					subTopic: subTopic
			});
		});
		res.render('index',{
			items:items
		});
	});
});

app.listen(3000,function(){
	console.log('开始监听 3000端口');
});

//- index.pug
extend ./layout.pug

block content
	ul.col.md-6
		each item in items
			li
				a(href=item.href) #{item.name}
			if(item.subTopic.length>0)
				ul
					each subitem in item.subTopic
						li
							a(href=subitem.subHref) #{subitem.subName}



//- layout.pug
doctype html
html
	head
		title 中共十九大报告目录
		meta( http-equiv="Content-Type" content="text/html; charset=utf-8")
		link(rel="stylesheet" type="text/css" href="/bootstrap/dist/css/bootstrap.min.css")
		style.
			li{list-style:none};
			a{text-decoration:none;}
	body
		block content


我的项目目录



这就是爬到的标题

猜你喜欢

转载自blog.csdn.net/superxuyuey/article/details/78382915