潭州课堂25班:Ph201805201 爬虫基础 第十一课 点触验证码 (课堂笔记)

打开 网易盾 http://dun.163.com/trial/picture-click  ——在线体验——图中点选

打码平台 ——超级鹰    http://www.chaojiying.com/

 网易盾  抓取验证码图片

# -*- coding: utf-8 -*-
# 斌彬电脑
# @Time : 2018/9/13 0013 5:27

from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import requests,re
from PIL import Image
from io import BytesIO      # 不写入磁盘,显示图片文件
import time

class WanYy():
    def __init__(self, user, pas):
        # 浏览器参数
        self.user = user
        self.pas = pas
        options = Options()
        options.add_argument('--window-size=1366,768')
        self.dri = webdriver.Chrome(chrome_options=options)
        self.wait = WebDriverWait(self.dri, 10)

    def get_start(self):
        # 请求网页
        self.dri.get('http://dun.163.com/trial/picture-click')
        # 下拉页面
        self.dri.execute_script('window.scrollTo(0, 500)')

    def get_image(self):
        #  点击按键,显示验证码
        #  获取验证码 图片
        self.wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/main/div/div/div[2]/div[2]/div[2]/div/div[2]/div[3]/div/div/div[2]/div[3]/span[2]'))).click()
        #  等待加载完成 截图
        # time.sleep(2)

        im = self.wait.until(EC.visibility_of_element_located((
            By.XPATH, '/html/body/main/div/div/div[2]/div[2]/div[2]/div/div[2]/div[3]/div/div/div[1]/div/div[1]/img[1]'
        )))  # 异步  比 time.sleep 好
        im1 = BytesIO(self.dri.get_screenshot_as_png())
        # Image.open(im).show()
        im2 = Image.open(im1)
        #                     浏览器的左上角坐标     -500 因为下滑了500,
        window_im = im2.crop((im.location['x'], im.location['y']-500,im.location['x']+310, im.location['y']+210-500))
        # window_im.show()
        im_data = BytesIO()
        window_im.save(im_data, format('png'))
        # 返回图片 二进制 数据
        return im_data.getvalue()

    def __call__(self, *args, **kwargs):
        self.get_start()
        self.get_image()
        time.sleep(5)
        self.dri.close()


if __name__ == '__main__':
    yedun = WanYy(账号, 密码)
    yedun()

  

超级鹰  验证码读取

# -*- coding: utf-8 -*-
# 斌彬电脑
# @Time : 2018/9/13 0013 5:04

#!/usr/bin/env python
# coding:utf-8

import requests
from hashlib import md5

class Chaojiying_Client(object):

    def __init__(self, username, password, soft_id):
        self.username = username
        password = password.encode('utf8')
        self.password = md5(password).hexdigest()
        self.soft_id = soft_id
        self.base_params = {
            'user': self.username,
            'pass2': self.password,
            'softid': self.soft_id,
        }
        self.headers = {
            'Connection': 'Keep-Alive',
            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
        }

    def PostPic(self, im, codetype):
        """
        im: 图片字节
        codetype: 题目类型 参考 http://www.chaojiying.com/price.html
        """
        params = {
            'codetype': codetype,
        }
        params.update(self.base_params)
        files = {'userfile': ('ccc.jpg', im)}
        r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
        return r.json()

    def ReportError(self, im_id):
        """
        im_id:报错题目的图片ID
        """
        params = {
            'id': im_id,
        }
        params.update(self.base_params)
        r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
        return r.json()


if __name__ == '__main__':
    chaojiying = Chaojiying_Client(账号, 密码, '897271')
    #用户中心>>软件ID 生成一个替换 96001
	im = open('a.jpg', 'rb').read()
    #本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
	print(chaojiying) .PostPic(im, 1902)
    #1902 验证码类型  官方网站>>价格体系 3.4+版 print 后要加()

  

猜你喜欢

转载自www.cnblogs.com/gdwz922/p/9638391.html
今日推荐