scrapy----将数据保存到MongoDB中

1.在pipelines.py中自定义自己的pipeline


import pymongo
class MongoPipeline(object):
    def __init__(self, client, db):
        self.client = pymongo.MongoClient(client)
        self.db = self.client[db]

    # from_crawler()作用就是从settings.py中读取相关配置,然后可以将读取结果保存在类中使用。
    @classmethod
    def from_crawler(cls, crawler):
        # 创建当前类的对象,并传递两个参数。
        obj = cls(
            client=crawler.settings.get('MONGOCLIENT', 'localhost'),
            db=crawler.settings.get('DB', 'test')
        )
        return obj

    def process_item(self, item, spider):
        self.db['novel'].update_one({'url': item['url']}, {'$set': dict(item)}, True)
        return item

2.在settings.py中开启自己的pipeline

ITEM_PIPELINES = {
  'NovelSpider.pipelines.MongoPipeline': 301,
}
MONGOCLIENT = 'localhost'
DB = 'novel'



猜你喜欢

转载自blog.csdn.net/qq_38661599/article/details/80945980