Python爬虫学习笔记之极限滑动验证码的识别

代码:

  1 import time
  2 from io import BytesIO
  3 from PIL import Image
  4 from selenium import webdriver
  5 from selenium.webdriver import ActionChains
  6 from selenium.webdriver.common.by import By
  7 from selenium.webdriver.support.ui import WebDriverWait
  8 from selenium.webdriver.support import expected_conditions as EC
  9 
 10 EMAIL = '' # 邮箱 密码需要自己注册
 11 PASSWORD = ''
 12 BORDER = 6
 13 INIT_LEFT = 60
 14 
 15 
 16 class CrackGeetest():
 17     def __init__(self):
 18         self.url = 'https://account.geetest.com/login'
 19         self.browser = webdriver.Chrome()
 20         self.wait = WebDriverWait(self.browser, 20)
 21         self.email = EMAIL
 22         self.password = PASSWORD
 23     
 24     def __del__(self):
 25         self.browser.close()
 26     
 27     def get_geetest_button(self):
 28         """
 29         获取初始验证按钮
 30         :return:
 31         """
 32         button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_radar_tip')))
 33         return button
 34     
 35     def get_position(self):
 36         """
 37         获取验证码位置
 38         :return: 验证码位置元组
 39         """
 40         img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_canvas_img')))
 41         time.sleep(2)
 42         location = img.location
 43         size = img.size
 44         top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
 45             'width']
 46         return (top, bottom, left, right)
 47     
 48     def get_screenshot(self):
 49         """
 50         获取网页截图
 51         :return: 截图对象
 52         """
 53         screenshot = self.browser.get_screenshot_as_png()
 54         screenshot = Image.open(BytesIO(screenshot))
 55         return screenshot
 56     
 57     def get_slider(self):
 58         """
 59         获取滑块
 60         :return: 滑块对象
 61         """
 62         slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_slider_button')))
 63         return slider
 64     
 65     def get_geetest_image(self, name='captcha.png'):
 66         """
 67         获取验证码图片
 68         :return: 图片对象
 69         """
 70         top, bottom, left, right = self.get_position()
 71         print('验证码位置', top, bottom, left, right)
 72         screenshot = self.get_screenshot()
 73         captcha = screenshot.crop((left, top, right, bottom))
 74         captcha.save(name)
 75         return captcha
 76     
 77     def open(self):
 78         """
 79         打开网页输入用户名密码
 80         :return: None
 81         """
 82         self.browser.get(self.url)
 83         email = self.wait.until(EC.presence_of_element_located((By.ID, 'email')))
 84         password = self.wait.until(EC.presence_of_element_located((By.ID, 'password')))
 85         email.send_keys(self.email)
 86         password.send_keys(self.password)
 87     
 88     def get_gap(self, image1, image2):
 89         """
 90         获取缺口偏移量
 91         :param image1: 不带缺口图片
 92         :param image2: 带缺口图片
 93         :return:
 94         """
 95         left = 60
 96         for i in range(left, image1.size[0]):
 97             for j in range(image1.size[1]):
 98                 if not self.is_pixel_equal(image1, image2, i, j):
 99                     left = i
100                     return left
101         return left
102     
103     def is_pixel_equal(self, image1, image2, x, y):
104         """
105         判断两个像素是否相同
106         :param image1: 图片1
107         :param image2: 图片2
108         :param x: 位置x
109         :param y: 位置y
110         :return: 像素是否相同
111         """
112         # 取两个图片的像素点
113         pixel1 = image1.load()[x, y]
114         pixel2 = image2.load()[x, y]
115         threshold = 60
116         if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(
117                 pixel1[2] - pixel2[2]) < threshold:
118             return True
119         else:
120             return False
121     
122     def get_track(self, distance):
123         """
124         根据偏移量获取移动轨迹
125         :param distance: 偏移量
126         :return: 移动轨迹
127         """
128         # 移动轨迹
129         track = []
130         # 当前位移
131         current = 0
132         # 减速阈值
133         mid = distance * 4 / 5
134         # 计算间隔
135         t = 0.2
136         # 初速度
137         v = 0
138         
139         while current < distance:
140             if current < mid:
141                 # 加速度为正2
142                 a = 2
143             else:
144                 # 加速度为负3
145                 a = -3
146             # 初速度v0
147             v0 = v
148             # 当前速度v = v0 + at
149             v = v0 + a * t
150             # 移动距离x = v0t + 1/2 * a * t^2
151             move = v0 * t + 1 / 2 * a * t * t
152             # 当前位移
153             current += move
154             # 加入轨迹
155             track.append(round(move))
156         return track
157     
158     def move_to_gap(self, slider, track):
159         """
160         拖动滑块到缺口处
161         :param slider: 滑块
162         :param track: 轨迹
163         :return:
164         """
165         ActionChains(self.browser).click_and_hold(slider).perform()
166         for x in track:
167             ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
168         time.sleep(0.5)
169         ActionChains(self.browser).release().perform()
170     
171     def login(self):
172         """
173         登录
174         :return: None
175         """
176         submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'login-btn')))
177         submit.click()
178         time.sleep(10)
179         print('登录成功')
180     
181     def crack(self):
182         # 输入用户名密码
183         self.open()
184         # 点击验证按钮
185         button = self.get_geetest_button()
186         button.click()
187         # 获取验证码图片
188         image1 = self.get_geetest_image('captcha1.png')
189         # 点按呼出缺口
190         slider = self.get_slider()
191         slider.click()
192         # 获取带缺口的验证码图片
193         image2 = self.get_geetest_image('captcha2.png')
194         # 获取缺口位置
195         gap = self.get_gap(image1, image2)
196         print('缺口位置', gap)
197         # 减去缺口位移
198         gap -= BORDER
199         # 获取移动轨迹
200         track = self.get_track(gap)
201         print('滑动轨迹', track)
202         # 拖动滑块
203         self.move_to_gap(slider, track)
204         
205         success = self.wait.until(
206             EC.text_to_be_present_in_element((By.CLASS_NAME, 'geetest_success_radar_tip_content'), '验证成功'))
207         print(success)
208         
209         # 失败后重试
210         if not success:
211             self.crack()
212         else:
213             self.login()
214 
215 
216 if __name__ == '__main__':
217     crack = CrackGeetest()
218     crack.crack()

猜你喜欢

转载自www.cnblogs.com/Trojan00/p/9501653.html