scrapy--模拟登录的三种方式

1.在头文件中带上登录后的cookies值

在spider文件中

name = ‘爬虫名字’

allowed_domains = [' ']

start_urls = (  )

cookies = {  }

。。。。。。(一些获取html的函数)

最后调用回调函数

def start_requests(self):
    for url in self.start_urls:
    #yield scrapy.Request(url, callback = self.parse)
        #url = "http://www.renren.com/410043129/profile"
        yield scrapy.FormRequest(url, cookies = self.cookies, callback = self.parse_page)
 
 

2.start_requests(self),从登录界面开始爬取 带上账号和密码登录,回到函数是主页面

def start_requests(self):
    url = 'http://www.renren.com/PLogin.do'
    yield scrapy.FormRequest(
            url = url,
            formdata = {"email" : "[email protected]", "password" : "alarmchime"},
            callback = self.parse_page)

def parse_page(self, response):
    with open("mao2.html", "w") as filename:
        filename.write(response.body)
3. 向登录界面发起一个请求获得隐藏字段
start_urls = (
    "http://www.renren.com/PLogin.do",
)

def parse(self, response):
 (通过xpath获得隐藏字段) _xsrf = response.xpath("//_xsrf").extract()[0]
    yield scrapy.FormRequest.from_response(
            response,
            formdata = {"email" : "[email protected]", "password" : "alarmchime"}, "_xsrf" = _xsrf},
            callback = self.parse_page
        )
 


猜你喜欢

转载自blog.csdn.net/huangmengfeng/article/details/80030079