爬虫--雪球网爬取(requests 和 request 的两种方法)

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/MR_HJY/article/details/81713093
import json
from urllib import request

# 因为不能访问, 所以加个请求头试试
headers = {
    #'Accept': '*/*',
    #'Accept-Encoding': 'gzip, deflate, br',
    #'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
    #'Connection': 'keep-alive',
    'Cookie': 'aliyungf_tc=AQAAALoQF3p02gsAUhVFebQ3uBBNZn+H; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.516718356.1534295265; _gid=GA1.2.1050085592.1534295265; u=301534295266356; device_id=f5c21e143ce8060c74a2de7cbcddf0b8; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534295265,1534295722; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534295722',
    #'Host': 'xueqiu.com',
    #'Referer': 'https://xueqiu.com/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
    #'X-Requested-With': 'XMLHttpRequest',
    #'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}

# urllib 的相关操作如下
url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111'

# request.Request
req = request.Request(url, headers=headers)

# 通过request 请求我们的雪球
response = request.urlopen(req)

res = response.read() # byte类型
## 字符串, 需要转成dict/list
#print(res)
## 转化函数 res_dict = json.loads(res)
res_dict = json.loads(res.decode('utf-8')) #字典类型
# print 这个res_dict
# print(res_dict)
# print('res_dict==',res_dict)
list_list = res_dict['list']
print()
print('list_list==',list_list)
# 遍历 list_list
for list_item_dict in list_list:
    # list 列表内的一个item, 他是一个dict
    data_str = list_item_dict['data']
    print(data_str)
    print('-'*50)
import json
import requests

# 因为不能访问, 所以我们加个头试试
headers = {
    #'Accept': '*/*',
    #'Accept-Encoding': 'gzip, deflate, br',
    #'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
    #'Connection': 'keep-alive',
    'Cookie': 'aliyungf_tc=AQAAALoQF3p02gsAUhVFebQ3uBBNZn+H; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.516718356.1534295265; _gid=GA1.2.1050085592.1534295265; u=301534295266356; device_id=f5c21e143ce8060c74a2de7cbcddf0b8; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534295265,1534295722; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534295722',
    #'Host': 'xueqiu.com',
    #'Referer': 'https://xueqiu.com/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
    #'X-Requested-With': 'XMLHttpRequest',
    #'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}

# urllib 的相关操作如下
url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111'
#
# # request.Request
# req = request.Request(url, headers=headers)
#
# # 通过request 请求我们的雪球
# response = request.urlopen(req)
#
# res = response.read()
# ## 字符串, 需要转成dict/list

response = requests.get(url, headers=headers)
# res = response.content
#print(res)
## 转化函数 res_dict = json.loads(res)
res_dict = json.loads(response.text)

# print 这个res_dict
#print(res_dict)

list_list = res_dict['list']
#print(list_list)
# 遍历 list_list
for list_item_dict in list_list:
    # list 列表内的一个item, 他是一个dict
    data_str = list_item_dict['data']
    print(data_str)
    print('-'*50)

猜你喜欢

转载自blog.csdn.net/MR_HJY/article/details/81713093