node.js爬取JS代码执行后的页面内容代码

const puppeteer = require('puppeteer');
const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
var fs = require("fs")

var url="http://www.baidu.com"
var len=999999999
async function getHtml(ppp) {
console.log("正在处理: ",ppp)
if(ppp>=len) return
ppp=ppp+1
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url, {waitUntil: 'domcontentloaded'});
await page.waitForNavigation({'timeout': 1000*300*2});//当前代码等待页面渲染出某个 DOM 元素后继续执行后面代码
let content=await page.content()
await browser.close();
console.log("处理链接二")

//console.log(content)


getHtml(ppp)
}


getHtml(0)
发布了573 篇原创文章 · 获赞 747 · 访问量 1086万+

猜你喜欢

转载自blog.csdn.net/update7/article/details/105614908