1.robots.txt协议
ROBOTSTXT_OBEY = False
需要激活并修改为False
DOWNLOAD_DELAY = 0.5
download_delay需要激活,并设置时间,降低爬取速度
COOKIES_ENABLED = False
禁用cookie追踪
#SPIDER_MIDDLEWARES = { # 'IvskySpider.middlewares.IvskyspiderSpiderMiddleware': 543, #} 用于设置那些middlewares有效
# Enable or disable downloader middlewares # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html DOWNLOADER_MIDDLEWARES = { 'IvskySpider.middlewares.UserAgentMiddleware': 543, 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None, # 数字表示优先级 越小越先执行 如果填为none 表示不执行 }
# 哪些pipline可以使用 #ITEM_PIPELINES = { # 'IvskySpider.pipelines.IvskyspiderPipeline': 300, #}