下面的代码比较混乱,只是实现了功能,但是包有点混用,最后实现了通过上一篇内容获取的各个详情页的id,依次获取详情页面的数据。
其中用了cheerio 对页面数据进行了操作。
最后,也是把数据写入本地文件夹中。
// 'use strict';
const puppeteer = require('puppeteer');
const requestSys = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const detailIdList = require('./data/detail/detailIdList.json');
const urlArr = [
'http://www.beitaichufang.com/recipe/'
];
// 等待3000毫秒
const sleep = time => new Promise(resolve => {
setTimeout(resolve, time);
})
// gotoDetailPage()
function gotoDetailPage(id) {
(async () => {
try {
// for(let i = 0; i<detailIdList.length; i++ ){
await sleep(3000)
const browser = await puppeteer.launch({
ignoreHTTPSErrors: true,
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
devtools: true,
timeout: 0
}).catch(() => browser.close);
const page = await browser.newPage();
// var num = 0;
// await page.on('request', request => {
// // console.log(request.body)
// });
// {timeout: 3000}
await page.goto(urlArr[0]+id+'.html',{waitUntil: 'networkidle2'});
const html = await page.content();
let $=cheerio.load(html);
setDetailItem($, id)
await sleep(3000)
await browser.close();
// }
} catch (e) {
console.log(e);
}
})();
}
function regString(str) {
console.log(str, '==================str')
// let strTemp = str.toString()
// strTemp.replace(/\ +/g,"");
return str
}
function setDetailItem($, id) {
console.log('进入到页面解析。。。')
let item = {}
item.converUrl = regString($('#menu-img').attr('src')); // 封面
item.name = regString($('#menu-name').text()); // 标题
// item.videoUrl = $('#menu-name').text(); // 视频地址
item.desc = regString($('#chef-intro').text()); // 描述
let stepList = $('#menu-step .item'); // 步骤
let itemList = [];
stepList.each(function(item) {
var cap = $(this);
//console.log(cap.find('h3').text());
var item = {
step: regString(cap.find('.item-title').text()),
intro: regString(cap.find('.item-intro').text()),
imgUrl: cap.find('img').attr('src')
}
itemList.push(item);
});
console.info(itemList);
item.stepList = itemList
item.makeTime = regString($('#menu-time').text()); // 制作时间
item.level = regString($('#menu-degree').text()); // 难易程度
item.texture = regString($('#menu-taste').text()); // 口感
// 将数据写入
let writerStream = fs.createWriteStream(`./data/${id}.json`);
writerStream.write(JSON.stringify(item, undefined, 2), 'UTF8');
writerStream.end();
console.log('结束')
}
// 获取详情页面数据
function getDetailData() {
let count = 0;
// detailIdList.length
let T = setInterval(function(){
if(count < detailIdList.length) {
gotoDetailPage(detailIdList[count])
} else {
clearInterval(T)
}
count = count + 1
}, 15000)
// for(let i = 0; i<detailIdList.length; i++ ){
// // (async () => {
// // await sleep(3000)
// gotoDetailPage(detailIdList[i])
// // })();
// }
}
getDetailData()