import requests
import json
from pymysql_text import Mysql_text
# url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111'
# url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=184275&count=15&category=111'
# url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=184086&count=15&category=111'
# url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=183687&count=15&category=111'
#封装成一个函数
def datapq(url):
#定义头部信息
headers = {
# 'Accept' : '*/*',
# 'Accept-Encoding' : 'gzip, deflate, br',
# 'Accept-Language' : 'zh-CN,zh;q=0.9',
# 'Connection' : 'keep-alive',
'Cookie' : 'aliyungf_tc=AQAAAHV3wnlDrgcAJxJ5ah0CThlP1yWJ; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.121527135.1534335428; _gid=GA1.2.1591594565.1534335428; _gat_gtag_UA_16079156_4=1; u=681534335428965; device_id=3ad9d628b7f64b88029d0cce211de4c7; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534335430; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534335430',
# 'Host' : 'xueqiu.com',
# 'Referer' : 'https://xueqiu.com/',
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
# 'X-Requested-With' : 'XMLHttpRequest',
}
#使用request 的get方法获取到response
response = requests.get(url,headers=headers)
#把结果转成字典类型
res = json.loads(response.text)
#定义一个空字典
data = {}
ys = []
#循环遍历并组成新的字典data
for v in res['list']:
#取出他的关键信息 max_id
data['tid'] = v['id']
#将res转为字典
res = json.loads(v['data'])
# print(res)
data['ids'] = res['id']
data['title'] = res['title']
data['description'] = res['description']
data['target'] = res['target']
print(data)
try:
#调用封装的存数据库的类并进行存储操作
m = Mysql_text()
m.sqlzz('insert into text(tid,ids,title,description,target) values (%(tid)s,%(ids)s,%(title)s,%(description)s,%(target)s)',data)
except:
#过滤掉存储不了的信息
pass
ys.append(data.pop('tid'))
resul = ys[-1]
return resul
# s = Mysql_text()
# a=s.sqlzz('select tid from text')
# print(a)
url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111'
#调用函数
i = 0
while i<100:
i += 1
ss = datapq(url)
url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id='+str(ss)+'&count=15&category=111'
封装了一个函数用来实现全自动爬取雪球网的数据
猜你喜欢
转载自blog.csdn.net/yangbenhao/article/details/81712985
今日推荐
周排行