一、在settings.py文件中配置数据库连接参数
# 数据库连接参数 DB_HOST = '192.168.183.1' DB_PORT = 3306 DB_USER = 'root' DB_PASSWORD = '123123' DB_DATABASE = 'a' DB_CHARSET = 'utf8' # 设置一个管道用于将数据插入数据库 ITEM_PIPELINES = { 'dushu.pipelines.DushuPipeline': 300, 'dushu.pipelines.DushuMysql':301, }
二、在pipelines.py文件定义管道
读取settings文件中的参数:
from scrapy.utils.project import get_project_settings
settings = get_project_settings()
# 将settings文件导入到python文件 from scrapy.utils.project import get_project_settings import pymysql class DushuMysql(object): def __init__(self): #settings中包含了setting的属性 settings = get_project_settings() self.host =settings['DB_HOST'] self.port = settings['DB_PORT'] self.user = settings['DB_USER'] self.password = settings['DB_PASSWORD'] self.database = settings['DB_DATABASE'] self.charset = settings['DB_CHARSET'] self.connect() def connect(self): self.conn = pymysql.connect(host=self.host,port=self.port,user=self.user,password=self.password,db=self.database,charset = self.charset,) self.cursor = self.conn.cursor() def process_item(self, item, spider): try: self.cursor.execute( 'insert into books values("%s","%s","%s")' % (item['src'],item['alt'],item['author']) ) # 注意需要提交 self.conn.commit() except Exception as e: print(str(e)) return item def close_spider(self, spider): self.cursor.close() self.conn.close()