# -*- coding: utf-8 -*- # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html import json import pymysql class Py062018316Pipeline(object): def process_item(self, item, spider): return item #数据库操作基类 class MysqlPipeline(object): def __init__(self): #链接数据库 #self.conn=pymysql.connect(ip,用户,密码,数据库,编码字符集) self.conn=pymysql.connect('127.0.0.1','root','zwl123','python01',charset='utf8')#链接数据库 self.cursor=self.conn.cursor()#获取游标 def process_item(self, item, spider): pass def close_spider(self,spider): self.cursor.close() self.conn.close() class QiuShiPipeline(object): def __init__(self): self.file=open('qiushi.json','w',encoding='utf-8') def process_item(self, item, spider): #数据存储 json.dump(dict(item),open('qiushi.json','a',encoding='utf-8'),ensure_ascii=False) return item def close_spider(self): self.file.close() #腾讯招聘 一、json实现方法 class TencentPipeline(object): def __init__(self): self.file=open('tencent.json','w',encoding='utf-8') def process_item(self, item, spider): #数据存储 str=json.dumps(dict(item),ensure_ascii=False) str=str+'\n' self.file.write(str) # json.dump(dict(item),open('tencent.json','a',encoding='utf-8'),ensure_ascii=False) return item def close_spider(self): self.file.close() #腾讯招聘 二、继承MysqlPipeline数据库链接类实现方法 class TencentPipeline(MysqlPipeline): # def __init__(self): # self.file=open('tencent.json','w',encoding='utf-8') def process_item(self, item, spider): #增删改查 ,type,num,location,date,url,duty,rq sql = "insert into tencent_scrapy03(id,name,type,num,location,date,url,duty,rq) VALUES (DEFAULT,%s,%s,%s,%s,%s,%s,%s,%s)" data=(item['name'],item['type'],item['num'],item['location'],item['date'],item['url'],item['duty'],item['rq']) # name=item['name'] # type=item['type'] # num=item['num'] # location=item['location'] # date=item['date'] # url=item['url'] # duty=item['duty'] # rq=item['rq'] # data=(item['name'],item['type'],item['num'],item['location'],item['date'],item['url'],item['duty'],item['rq']) try: # self.cursor.execute(sql,(name,type,num,location,date,url,duty,rq)) self.cursor.execute(sql,(data)) self.conn.commit() except Exception as e: print('插入数据失败',e) self.conn.rollback() return item # def close_spider(self): # self.file.close()
MAC中:
1、系统偏好设置->打开MySql服务
2、打开Navicat for MySql图形界面(方便查看数据)
3、打开终端MySql: mysql -u root -p (有需要输入密码)
4、show databases; (显示所有数据库)
5、use python01(进入其中需要的数据库)
6、show tables; (显示所有python01数据库中的数据表)
7、
create table tencent_scrapy02(id int(4) not null primary key auto_increment,name varchar(300),type varchar(300))auto_increment=1 charset=utf8;(创建表语句)
8、
select * from XX数据表\G; (命令行里查看爬取的数据)
9、若因为某些原因删除了所有数据,是需要重新让id从1自增的
ALTER TABLE XX数据表 auto_increment = 1;