# -*- coding:utf-8 -*- import json import execjs import scrapy from p2pinvestment.utils import transform_capital, transform_term_of_investment, transform_rate, transform_time, \ transfrom_phone, transform_time1 LIST_URL = 'https://www.mdjr.com/service3/miaodai.loans.search' POST_URL = 'https://www.mdjr.com/service3/miaodai.loan.get' PHONE_URL = 'https://www.mdjr.com/service3/loan.invests.search' with open(r'E:\p2pinvestment\p2pinvestment\js\mdjr.txt', 'r', encoding='utf-8') as js_file: file = js_file.read() def crack_ciphertext(page, id_number): phantom = execjs.get() getpass = phantom.compile(file) mypass = getpass.call('encrypt', page, id_number) data = mypass.split('&') dict_data = dict((map(lambda data: data.split('='), data))) return dict_data class MdjrSpider(scrapy.Spider): name = 'mdjr' company_name = '秒贷金融' custom_settings = { 'CONCURRENT_REQUESTS': 5, 'DOWNLOAD_DELAY': 0.25, 'DEFAULT_REQUEST_HEADERS': { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' } } def start_requests(self): data = crack_ciphertext(1, '') yield scrapy.FormRequest( url=LIST_URL, method='POST', formdata=data, callback=self.parse_target_list, meta={ 'page': 1, }) def parse_target_list(self, response): page = response.meta['page'] text = json.loads(response.text) pages = text['page']['pages'] list_item = text['page']['list'] for list_ in list_item: title = list_['title'] id_number = list_['id'] data = crack_ciphertext(page, id_number) yield scrapy.FormRequest(url='https://www.mdjr.com/service3/miaodai.loan.get', formdata=data, callback=self.parse_pagecode, meta={'title': title, 'id_number': id_number}, priority=2) if page < int(pages): page += 1 data = crack_ciphertext(page, '') yield scrapy.FormRequest(url=LIST_URL, method="POST", formdata=data, callback=self.parse_target_list, meta={'page': page}, priority=1) def parse_pagecode(self, response): text = json.loads(response.text) item = {} item['RatePercentage'] = transform_rate(text['loan']['baseRate']) item['Capital'] = transform_capital(text['loan']['borrowAmount']) loantype = text['loan']['loanType'] if 'TYBD' in loantype: item['TermOfInvestment'] = transform_term_of_investment(str(text['loan']['debitTerm']) + '天') else: item['TermOfInvestment'] = transform_term_of_investment(str(text['loan']['debitTerm']) + '月') item['TargetStartTime'] = transform_time1(text['loan']['openTime']) item['TargetName'] = response.meta['title'] item['TargetEndTime'] = transform_time() item['CompanyName'] = self.company_name id_number = response.meta['id_number'] page = 1 data = crack_ciphertext(page, id_number) yield scrapy.FormRequest(url=PHONE_URL, formdata=data, meta={'page': page, 'id_number': id_number, 'item': item}, callback=self.parse_phone, priority=3) def parse_phone(self, respones): text = json.loads(respones.text) container_item = respones.meta['item'].copy() pages = text['page']['lastPage'] phones = text['page']['list'] for phone in phones: container_item['TargetCapital'] = transform_capital(phone['amount']) container_item['Time'] = transform_time(phone['confirmProcessTime']) container_item['PhoneNumber'] = transfrom_phone(phone['secureMobile']) yield container_item page = respones.meta['page'] if page < pages: page += 1 id_number = respones.meta['id_number'] item = respones.meta['item'] data = crack_ciphertext(page, id_number) yield scrapy.FormRequest(url=PHONE_URL, formdata=data, meta={'page': page, 'id_number': id_number, 'item': item}, callback=self.parse_phone, priority=4)
python伪代码之爬取秒贷金融p2p信息运行代码持续更新:【内向即废物--沈吟秋】
猜你喜欢
转载自blog.csdn.net/qq_37995231/article/details/79209155
今日推荐
周排行