模拟登录
# -*- coding: utf-8 -*-
import scrapy
from scrapy import cmdline
#from scrapy.spiders import CrawlSpider
import scrapy
from scrapy.contrib.spiders.crawl import CrawlSpider
from astropy.io.fits.header import Header
class ZhihuUserSpider(CrawlSpider):
name = "zhihu_user"
allowed_domains = ['zhihu.com']
start_urls = ["http://www.zhihu.com"]
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'
}
def start_requests(self): #登陆
return [scrapy.FormRequest(
"http://www.zhihu.com/login/phone_num",
formdata = {
'_xsrf':'7ecec739ea7f9e42d3a605e2c44883d3',
'remember_me':'true',
'password':'******',
'phone_num':'******'
},
headers=self.headers,
#cookies = self.cook,
callback = self.after_login,
)]
def after_login(self, response):
print 'after login'
yield scrapy.Request('https://www.zhihu.com/people/shuangyueliao-82/activities',callback=self.onetwo,headers=self.headers)
def onetwo(self,response):
print response.body
if __name__ == '__main__':
scrapy.cmdline.execute(argv=['scrapy','crawl','zhihu_user'])
Cookie登录
# -*- coding: utf-8 -*-
import scrapy
from scrapy import cmdline
#from scrapy.spiders import CrawlSpider
import scrapy
from scrapy.contrib.spiders.crawl import CrawlSpider
from astropy.io.fits.header import Header
class ZhihuUserSpider(CrawlSpider):
name = "zhihu_user1"
allowed_domains = ['zhihu.com']
start_urls = ["http://www.zhihu.com"]
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'
}
cook = {
'z_c0':'Mi4wQUhDQ2dzeEhlQXNBQU1JWGNGVnJDeGNBQUFCaEFsVk5NYlAwV0FDS0VaWkppOXI4LWtPZzJ0V3E5MXlhcWh0MTh3|1489839665|2c583ba1ed021db1f404d335d5958102386285c6'
}
def start_requests(self): #登陆
return [scrapy.Request('https://www.zhihu.com/people/shuangyueliao-82/activities', cookies = self.cook,callback=self.after_login,headers=self.headers)]
def after_login(self, response):
print 'after login'
print response.body
if __name__ == '__main__':
scrapy.cmdline.execute(argv=['scrapy','crawl','zhihu_user1'])