node抓取王者荣耀英雄资料库

用node写的爬虫,抓取王者荣耀英雄资料库,只是为了学习,侵删

主要包括(基本上相关的都抓取):

  • 召唤师技能
  • 铭文
  • 装备
  • 英雄
  • 皮肤
  • 英雄故事
  • 英雄技能
  • 推荐铭文
  • 推荐装备
  • 技能加点

没有搞懂如何动态抓取内容,我觉得抓取之前你要知道网页的js逻辑,emmmm....

这里我已经下载所需要的json文件到项目里面,如何下载:node下载文件

数据库的配置:

module.exports = {
    host: 'localhost',
    database: 'glory_of_kings',
    user: 'root',
    password: 'root'
};

也可以直接配置,不那么麻烦,只是为了学习新的方式。

// mysql的包 很多安利easymysql的,暂时还没改
let mysql = require('mysql');
// 文件操作
let fs = require('fs');
// mysql配置
let mysqlConfig = require('./mysql.config');

// 爬虫所需要的库
let http = require('http');
let cheerio = require('cheerio');
let iconv = require('iconv-lite');


//创建连接,数据操作完成之后要关闭连接
let connection = mysql.createConnection(mysqlConfig);

//连接mysql
connection.connect(function (err) {
    if (err) {
        console.log('数据库连接失败');
        throw err;
    }
});

// json文件的基础路径
const BASE_PATH = '../assets/jsons/';
// 文件编码方式
const FILE_TYPE = 'utf-8';


// 英雄资料页面基础路径
const HERO_DETAIL_PATH = `http://pvp.qq.com/web201605/herodetail/`;
//英雄图片的基础路径
const HERO_IMG_PATH = `http://game.gtimg.cn/images/yxzj/img201606/heroimg/`;
// 英雄皮肤大图的基础路径
const HERO_BIGSKIN_PATH = `http://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/`;
// 装备图片的基础路径
const EQUIP_IMG_PATH = `http://game.gtimg.cn/images/yxzj/img201606/itemimg/`;
//技能图片的基础路径
const SUMMONER_PATH = `http://game.gtimg.cn/images/yxzj/img201606/summoner/`;
// 铭文图片的基础路径
const INSCRIPTION_PATH = `http://game.gtimg.cn/images/yxzj/img201606/mingwen/`;


// 召唤师技能的插入语句
const SUMMONER_SQL = `insert into summoner(summoner_id,name,rank,cd,description,img_url,big_img_url) values(?,?,?,?,?,?,?)`;

// 铭文的插入语句
const INSCRIPTION_INSERT = `insert into inscription(inscription_id,type,grade,name,description,img_url) values(?,?,?,?,?,?)`;

// 英雄的插入语句
const HERO_INSERT = `insert into hero(hero_id,name,pay_type,new_type,hero_type,hero_type2,skin_name,img_url,live,attack,skill,difficulty) values (?,?,?,?,?,?,?,?,?,?,?,?)`;

//英雄皮肤的插入语句
const HERO_SKIN_INSERT = `insert into skin(skin_id,hero_id,skin_name,small_img_url,big_img_url) values (?,?,?,?,?)`;

// 装备的插入语句
const EQUIP_INSERT = `insert into equip(equip_id,name,type,sale_price,total_price,des1,des2,img_url) values (?,?,?,?,?,?,?,?)`;

//英雄故事插入语句
const STORY_INSERT = `insert into story(hero_id,story) values (?,?)`;

// 英雄技能插入语句
const SKILL_INSERT = `insert into skill(skill_id,hero_id,name,cool,waste,description,tips,img_url) values (?,?,?,?,?,?,?,?)`;

// 推荐铭文的插入语句
const HERO_INSCRIPTION_INSERT = `insert into hero_inscription(hero_id,inscription_ids,tips) values (?,?,?)`;

// 推荐装备的插入语句
const HERO_EQUIP_INSERT = `insert into hero_equip(hero_id,equip_ids1,tips1,equip_ids2,tips2) values (?,?,?,?,?)`;

// 英雄关系的插入语句
// 网页中每个项都是两个英雄,所以这里一次插入两条数据
const LINKS_INSERT = `insert into links(hero_id,hero_id1,type,tips) values (?,?,?,?),(?,?,?,?)`;

// 技能加点的建议
const SKILL_SUMMONER_INSERT = `insert into skill_summoner(ename,skill_id1,skill_id2,summoner_id1,summoner_id2) values (?,?,?,?,?)`;

/**
 * 更新召唤师技能
 */
function updateSummoner() {
    fs.readFile(`${BASE_PATH}summoner.json`, FILE_TYPE, function (err, data) {
        if (err) {
            console.log(`文件读取失败`)
        } else {
            if (JSON.parse(data).length === 0) {
                console.log(`暂无数据`);
            } else {
                let params = [];
                JSON.parse(data).forEach(function (it, index) {
                    params = [
                        it.summoner_id,
                        it.summoner_name,
                        parseInt(it.summoner_rank.substring(3)),
                        parseInt(it.summoner_description),
                        it.summoner_description.split(':')[1],
                        `${SUMMONER_PATH}${it.summoner_id}.jpg`,
                        `${SUMMONER_PATH}${it.summoner_id}-big.jpg`
                    ];
                    connection.query(SUMMONER_SQL, params, function (error, res) {
                        if (error) {
                            console.log(error);
                            throw error;
                        }
                        console.log(`技能${it.summoner_name}插入成功!`);
                    });
                });
            }
        }
    });
}

/**
 * 更新铭文
 */
function updateInscription() {
    fs.readFile(`${BASE_PATH}ming.json`, FILE_TYPE, function (err, data) {
        if (err) {
            console.log(`文件读取失败`)
        } else {
            if (JSON.parse(data).length === 0) {
                console.log(`暂无数据`);
            } else {
                let params = [];
                JSON.parse(data).forEach(function (it, index) {
                    params = [
                        it.ming_id,
                        it.ming_type,
                        it.ming_grade,
                        it.ming_name,
                        it.ming_des,
                        `${INSCRIPTION_PATH}${it.ming_id}.png`
                    ];
                    connection.query(INSCRIPTION_INSERT, params, function (error, res) {
                        if (error) {
                            console.log(error);
                            throw error;
                        }
                        console.log(`铭文${it.ming_name}插入成功!`);
                    });
                });
            }
        }
    });
}

/**
 * 更新装备
 */
function updateEquipments() {
    fs.readFile(`${BASE_PATH}item.json`, FILE_TYPE, function (err, data) {
        if (err) {
            console.log(`文件读取失败`)
        } else {
            if (JSON.parse(data).length === 0) {
                console.log(`暂无装备数据`);
            } else {
                let params = [];
                JSON.parse(data).forEach(function (it, index) {
                    params = [
                        it.item_id,
                        it.item_name,
                        it.item_type,
                        it.price,
                        it.total_price,
                        it.des1,
                        it.des2,
                        `${EQUIP_IMG_PATH}${it.item_id}.jpg`
                    ];
                    connection.query(EQUIP_INSERT, params, function (error, res) {
                        if (error) {
                            console.log(error);
                            throw error;
                        }
                        console.log(`装备:${it.item_name} 插入成功!`);
                    });
                });
            }
        }
    });
}

/**
 * 更新英雄
 */
function updateHero() {
    fs.readFile(`${BASE_PATH}heros.json`, FILE_TYPE, function (err, data) {
        if (err) {
            console.log(`文件读取失败`)
        } else {
            if (JSON.parse(data).length === 0) {
                console.log(`暂无数据`);
            } else {
                let params = [];
                JSON.parse(data).forEach(function (it, index) {
                    // 测试
                    http.get(`${HERO_DETAIL_PATH}${it.ename}.shtml`, function (res) {
                        let chunks = [];
                        res.on('data', function (item) {
                            chunks.push(item);
                        });
                        res.on('end', function () {
                            // 解码页面,防止乱码
                            let html = iconv.decode(Buffer.concat(chunks), 'gbk');
                            let $ = cheerio.load(html, {decodeEntities: false});
                            let list = $('.ibar');
                            params = [
                                it.ename,
                                it.cname,
                                it.pay_type,
                                it.new_type,
                                it.hero_type,
                                it.hero_type2,
                                it.skin_name,
                                `${HERO_IMG_PATH}${it.ename}/${it.ename}.jpg`,
                                parseInt(list[0].attribs.style.substring(6)),
                                parseInt(list[1].attribs.style.substring(6)),
                                parseInt(list[2].attribs.style.substring(6)),
                                parseInt(list[3].attribs.style.substring(6))
                            ];
                            connection.query(HERO_INSERT, params, function (error, res) {
                                if (error) {
                                    console.log(error);
                                    throw error;
                                }
                                console.log(`英雄${it.ename}插入成功!`);
                            });
                        })
                    }).on('error', function () {
                        console.log(`获取页面数据出错`);
                    });
                });
            }
        }
    });
}

/**
 * 更新皮肤
 */
function updateSkin() {
    fs.readFile(`${BASE_PATH}heros.json`, FILE_TYPE, function (err, data) {
        if (err) {
            console.log(`文件读取失败`)
        } else {
            if (JSON.parse(data).length === 0) {
                console.log(`暂无数据`);
            } else {
                let params = [];
                let names = [];
                JSON.parse(data).forEach(function (it, index) {
                    names = it.skin_name.split('|');
                    names.forEach(function (item, ind) {
                        params = [
                            it.ename + '' + (ind + 1),
                            it.ename,
                            item,
                            `${HERO_IMG_PATH}${it.ename}/${it.ename}-smallskin-${ind + 1}.jpg`,
                            `${HERO_BIGSKIN_PATH}${it.ename}/${it.ename}-bigskin-${ind + 1}.jpg`
                        ];
                        connection.query(HERO_SKIN_INSERT, params, function (error, res) {
                            if (error) {
                                console.log(error);
                                throw error;
                            }
                        });
                        console.log(`皮肤${item}插入成功!`);
                    });
                });
            }
        }
    });
}

/**
 * 更新故事
 */
function updateStory() {
    fs.readFile(`${BASE_PATH}heros.json`, FILE_TYPE, function (err, data) {
        if (err) {
            console.log(`文件读取失败`);
        } else {
            let datas = JSON.parse(data);
            if (datas.length === 0) {
                console.log(`暂无英雄故事`);
            } else {
                let params = [];
                datas.forEach(function (it, index) {
                    http.get(`${HERO_DETAIL_PATH}${it.ename}.shtml`, function (res) {
                        let chunks = [];
                        res.on('data', function (item) {
                            chunks.push(item);
                        });
                        res.on('end', function () {
                            // 解码页面,防止乱码
                            let html = iconv.decode(Buffer.concat(chunks), 'gbk');
                            let $ = cheerio.load(html, {decodeEntities: false});
                            params = [
                                it.ename,
                                $('.pop-story .pop-bd p').html()
                            ];
                            connection.query(STORY_INSERT, params, function (error, res) {
                                if (error) {
                                    console.log(error);
                                    throw error;
                                }
                                console.log(`${it.ename}的故事插入成功`);
                            });
                        })
                    }).on('error', function () {
                        console.log(`获取页面数据出错`);
                    });
                });
            }
        }
    });
}

/**
 * 更新技能
 */
function updateSkill() {
    fs.readFile(`${BASE_PATH}heros.json`, FILE_TYPE, function (err, data) {
        if (err) {
            console.log(`文件读取失败`);
        } else {
            if (JSON.parse(data).length === 0) {
                console.log(`暂无数据`);
            } else {
                let params = [];
                JSON.parse(data).forEach(function (it, index) {
                    // 测试
                    http.get(`${HERO_DETAIL_PATH}${it.ename}.shtml`, function (res) {
                        let chunks = [];
                        res.on('data', function (item) {
                            chunks.push(item);
                        });
                        res.on('end', function () {
                            // 解码页面,防止乱码
                            let html = iconv.decode(Buffer.concat(chunks), 'gbk');
                            let $ = cheerio.load(html, {decodeEntities: false});
                            let skills = $('.skill-show .show-list');
                            skills.each(function (ind, el) {
                                let name = $(el).find('.skill-name b').html();
                                if (name) {
                                    params = [
                                        it.ename + '' + ind,
                                        it.ename,
                                        name,
                                        parseInt($($(el).find('.skill-name span')[0]).html().substring(4)),
                                        parseInt($($(el).find('.skill-name span')[1]).html().substring(3)),
                                        $(el).find('.skill-desc').html(),
                                        $(el).find('.skill-tips').html(),
                                        `${HERO_IMG_PATH}${it.ename}/${it.ename}${ind}0.png`,
                                    ];
                                    connection.query(SKILL_INSERT, params, function (error, res) {
                                        if (error) {
                                            console.log(error);
                                            throw error;
                                        }
                                    });
                                    console.log(`技能${name}插入成功!`);
                                }
                            });
                        })
                    }).on('error', function () {
                        console.log(`获取页面数据出错`);
                    });
                });
            }
        }
    });

}

/**
 * 获取搭配铭文
 */
function updateHeroInscription() {
    fs.readFile(`${BASE_PATH}heros.json`, FILE_TYPE, function (err, data) {
        if (err) {
            console.log("文件读取失败");
        } else {
            if (JSON.parse(data).length === 0) {
                console.log('暂无数据');
            } else {
                let params = [];
                JSON.parse(data).forEach(function (it, index) {
                    http.get(`${HERO_DETAIL_PATH}${it.ename}.shtml`, function (res) {
                        let chunks = [];
                        res.on('data', function (item) {
                            chunks.push(item);
                        });
                        res.on('end', function () {
                            // 解码页面,防止乱码
                            let html = iconv.decode(Buffer.concat(chunks), 'gbk');
                            let $ = cheerio.load(html, {decodeEntities: false});
                            params = [
                                it.ename,
                                $('.sugg-info ul').attr('data-ming').split('|').join(','),
                                $('.sugg-tips').text().substring('5')
                            ];
                            connection.query(HERO_INSCRIPTION_INSERT, params, function (err, result) {
                                if (err) {
                                    console.log(err);
                                    throw err;
                                }
                                console.log(`第${index}条数据插入成功!`);
                            })
                        })
                    }).on('error', function () {
                        console.log("获取页面数据出错");
                    })
                });
            }
        }
    });
}

/**
 * 获取推荐装备
 */
function updateHeroEquip() {
    fs.readFile(`${BASE_PATH}heros.json`, FILE_TYPE, function (err, data) {
        if (err) {
            console.log("文件读取失败");
        } else {
            if (JSON.parse(data).length === 0) {
                console.log('暂无数据');
            } else {
                let params = [];
                JSON.parse(data).forEach(function (it, index) {
                    http.get(`${HERO_DETAIL_PATH}${it.ename}.shtml`, function (res) {
                        let chunks = [];
                        res.on('data', function (item) {
                            chunks.push(item);
                        });
                        res.on('end', function () {
                            // 解码页面,防止乱码
                            let html = iconv.decode(Buffer.concat(chunks), 'gbk');
                            let $ = cheerio.load(html, {decodeEntities: false});
                            params = [
                                it.ename,
                                $($('.equip-info ul')[0]).attr('data-item').split('|').join(','),
                                $($('.equip-tips')[0]).text().substring('5'),
                                $($('.equip-info ul')[1]).attr('data-item').split('|').join(','),
                                $($('.equip-tips')[1]).text().substring('5')
                            ];
                            // console.log(params);
                            connection.query(HERO_EQUIP_INSERT, params, function (err, result) {
                                if (err) {
                                    console.log(err);
                                    throw err;
                                }
                                console.log(`第${index}条数据插入成功!`);
                            })
                        })
                    }).on('error', function () {
                        console.log("获取页面数据出错");
                    })
                });
            }
        }
    });
}

/**
 * 获取英雄关系
 */
function updateLinks() {
    fs.readFile(`${BASE_PATH}heros.json`, FILE_TYPE, function (err, data) {
        if (err) {
            console.log("文件读取失败");
        } else {
            if (JSON.parse(data).length === 0) {
                console.log('暂无数据');
            } else {
                let params = [];
                JSON.parse(data).forEach(function (it, index) {
                    http.get(`${HERO_DETAIL_PATH}${it.ename}.shtml`, function (res) {
                        let chunks = [];
                        res.on('data', function (item) {
                            chunks.push(item);
                        });
                        res.on('end', function () {
                            // 解码页面,防止乱码
                            let html = iconv.decode(Buffer.concat(chunks), 'gbk');
                            let $ = cheerio.load(html, {decodeEntities: false});
                            $('.hero-info').each(function (ind, el) {
                                params = [
                                    it.ename,
                                    $($(el).find('ul a')[0]).attr('href').substr(0, 3),
                                    ind,
                                    $($(el).find('p')[0]).html(),
                                    it.ename,
                                    $($(el).find('ul a')[1]).attr('href').substr(0, 3),
                                    ind,
                                    $($(el).find('p')[1]).html(),
                                ];
                                connection.query(LINKS_INSERT, params, function (err, result) {
                                    if (err) {
                                        console.log(err);
                                        throw err;
                                    }
                                })
                            });
                            console.log(`第${index}条数据插入成功!`);
                        })
                    }).on('error', function () {
                        console.log("获取页面数据出错");
                    })
                });
            }
        }
    });
}

/**
 * 英雄技能加点推荐
 */
function updateSkillSummoner() {
    fs.readFile(`${BASE_PATH}heros.json`, FILE_TYPE, function (err, data) {
        if (err) {
            console.log("文件读取失败");
        } else {
            if (JSON.parse(data).length === 0) {
                console.log('暂无数据');
            } else {
                let params = [];
                JSON.parse(data).forEach(function (it, index) {
                    http.get(`${HERO_DETAIL_PATH}${it.ename}.shtml`, function (res) {
                        let chunks = [];
                        res.on('data', function (item) {
                            chunks.push(item);
                        });
                        res.on('end', function () {
                            // 解码页面,防止乱码
                            let html = iconv.decode(Buffer.concat(chunks), 'gbk');
                            let $ = cheerio.load(html, {decodeEntities: false});
                            params = [
                                it.ename,
                                $($('.sugg-skill img')[0]).attr('src').substring(50, 54),
                                $($('.sugg-skill img')[1]).attr('src').substring(50, 54),
                                $($('.sugg-info2 p')[5]).attr('data-skill').substring(0, 5),
                                $($('.sugg-info2 p')[5]).attr('data-skill').substring(6)
                            ];
                            connection.query(SKILL_SUMMONER_INSERT, params, function (err, result) {
                                if (err) {
                                    console.log(err);
                                    throw err;
                                }
                                console.log(`第${index}条数据插入成功!`);
                            });
                        })
                    }).on('error', function () {
                        console.log("获取页面数据出错");
                    })
                });
            }
        }
    });
}

// 关闭数据库连接
connection.end(function (e) {
    if (e) {
        console.log(`关闭数据库失败`);
        throw e;
    }
});

exports.updateSummoner = updateSummoner;
exports.updateInscription = updateInscription;
exports.updateHero = updateHero;
exports.updateSkin = updateSkin;
exports.updateEquipments = updateEquipments;
exports.updateStory = updateStory;
exports.updateSkill = updateSkill;
exports.updateHeroInscription = updateHeroInscription;
exports.updateHeroEquip = updateHeroEquip;
exports.updateLinks = updateLinks;
exports.updateSkillSummoner = updateSkillSummoner;

 所有英雄的herolist.json文件有点问题,解析json对象,老是报错,然后手动修改成了heros.json,两个文件内容是一样的,但是heros可以,herolist不可以,很尴尬,如果你解决了,可以和我说下,谢谢

周免英雄这里也有点意思,动态插入的,不方便直接获取,然后费了老大劲才发现,抓取的所有英雄的json文件中,pay_type=10的英雄就是周免,pay_type=11的英雄是新手推荐,header.js里面处理了,emmmm........(传送:周免英雄,代码在339-347

var freeHeroData = [],
    freeHeroHtml = "";
for (var i = 0; i < data.length; i++) {
    var payarr = [],
        payarr = ('' + data[i].pay_type).split(',');
    // 如果pay_type=10,插入的freeHeroData
    if (payarr == 10 || payarr[0] == 10 || payarr[1] == 10) {
        freeHeroData.push(data[i]);
    }
}
// console.log(freeHeroData);

OK!

猜你喜欢

转载自blog.csdn.net/u014264373/article/details/81076577