Need to download some methods
import {
createWriteStream
} from "node:fs";
import {
pipeline
} from "node:stream";
import {
promisify
} from "node:util";
import fetch from "node-fetch";
import {
Builder
} from "selenium-webdriver";
import * as chrome from "selenium-webdriver/chrome.js";
const service = new chrome.ServiceBuilder("C:\\WebDri\\bin\\chromedriver.exe");
const driver = new Builder()
.forBrowser("chrome")
.setChromeService(service)
.build();
// 打开
// driver.get("https://www.bilibili.com");
// setTimeout(() => {
// // 关闭
// driver.quit();
// }, 5000);
// 下载函数
async function download(url, filename) {
const streamPipeline = promisify(pipeline);
// 复制200里面的fetch格式文件
const response = await fetch(url);
if (!response.ok)
throw new Error(`unexpected response ${response.statusText}`);
await streamPipeline(response.body, createWriteStream(filename));
}
// 网页路径
let urls = ["https://www.woyaogexing.com/touxiang/index.html"];
for (let i = 2; i <= 10; i++) {
urls.push(`https://www.woyaogexing.com/touxiang/index_${i}.html`);
}
async function main() {
// 采集单页面图片
// driver.get("https://www.bilibili.com/anime/?spm_id_from=333.1073.0.0");
// await driver.wait(async () => {
// let flag = await driver.executeScript(`
// return document.querySelectorAll("#app > div.block-area.block-timeline > div.timeline-wrapper.fix-width > div.timeline-box.clearfix > ul > li > a > div > img").length > 0
// `);
// return flag;
// });
// let list = await driver.executeScript(`
// let arr = [...document.querySelectorAll("#app > div.block-area.block-timeline > div.timeline-wrapper.fix-width > div.timeline-box.clearfix > ul > li > a > div > img")]
// return arr.map(item=>item.src)
// `);
// console.log(list);
// 自动多页面采集图片
for (let url in urls) {
console.log("页面: " + urls[url]);
await driver.get(urls[url]);
// 判断是否有内容
await driver.wait(async () => {
let flag = await driver.executeScript(`
return document.querySelectorAll("#main > div.list-main.mt10.cl > div.list-left.z > div.pMain > div > a.img").length > 0
`);
return flag;
});
// 采集内容
let list = await driver.executeScript(`
let arr2 = [...document.querySelectorAll("#main > div.list-main.mt10.cl > div.list-left.z > div.pMain > div > a.img")]
return arr2.map((item) => {
return {
src: item.firstChild.src,
title: item.title,
};
});
`);
console.log(list);
// // 对数据的另存为到自己创建的文件夹
list.forEach((item) => {
download(item.src, "./imgs/" + item.title + ".jpg")
});
// 在此等待一秒,尽可能的模拟用户的行为
await driver.wait(async () => {
await new Promise((resolve) => {
setTimeout(() => {
resolve();
}, 1000);
});
return true;
});
}
}
main();
Get the path of the tag you want to collect
check path