是什么
实现方式
开源
滑块验证码的形式
滑块验证码的形式也很多,大多都类似下面这样的。
一般类似这样的验证码已经有很成熟的解决方案了。
比如 js 破解,图像识别破解等等。
但是我在项目中遇到的验证码有一点不同是下面这种验证码(拼夕夕的验证码)可以看到背景更复杂,而且它使用两个缺口,两个缺口的形状不一样,因此对于此类验证码识别更加困难。
采用的方法
对该类新型验证码分析,最开始准备使用深度学习的方法,大力出奇迹,但是有一个现实问题没多少数据。
公司目前收集的也只有六七十张,于是只好放弃深度学习,采用传统方法opencv来做。
使用opencv做的思路就是,获取滑块图和背景图,进行模板匹配,匹配度最高作为结果输出
1.滑块图、背景图切分
想要拿到干净的滑块图和背景图需要进行js破解,这个破解比较耗时一张验证码大概需要30s,30s后验证码都失效了,所以我只能手动切图。
下面是切图代码:
[py]
1
2
3
4
5
6
7
8def crop(img_name):
img = cv2.imread(img_name)
# bg = img[130:,0:362] # 裁剪坐标为[y0:y1, x0:x1]
part = img[0:362,0:139]
bg = img[0:362,139:]
# cv2.imwrite("part_crop.png", part)
# cv2.imwrite("bg_crop.png", bg)
return bg,part
切好的滑块图和背景图
图片切好过后就可以进行模板匹配了
模板匹配
对切好的图片先进行轮廓提取,然后再使用模板匹配,其中轮廓提取使用了Canny算子:
[py]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30def detect_captcha_gap(bg,tp):
'''
bg: 背景图片
tp: 缺口图片
return:空缺距背景图左边的距离
'''
# 读取背景图片和缺口图片
# bg_img = cv2.imread(bg) # 背景图片
# tp_img = cv2.imread(tp) # 缺口图片
bg_img = bg
tp_img = tp
# 识别图片边缘
bg_edge = cv2.Canny(bg_img, 100, 200)
tp_edge = cv2.Canny(tp_img, 100, 200)
# 转换图片格式
bg_pic = cv2.cvtColor(bg_edge, cv2.COLOR_GRAY2RGB)
tp_pic = cv2.cvtColor(tp_edge, cv2.COLOR_GRAY2RGB)
# cv2.imwrite("bg_style.png",bg_pic) # 保存背景轮廓提取
# cv2.imwrite("slide_style.png",tp_pic) # 保存滑块背景提取
# 缺口匹配
res = cv2.matchTemplate(bg_pic, tp_pic, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) # 寻找最优匹配
th, tw = tp_pic.shape[:2]
tl = max_loc # 左上角点的坐标
# 返回缺口的左上角X坐标
br = (tl[0]+tw,tl[1]+th) # 右下角点的坐标
cv2.rectangle(bg_img, tl, br, (0, 0, 255), 2) # 绘制矩形
cv2.imwrite("result_new.png", bg_img) # 保存在本地
# 返回缺口的左上角X坐标
return tl[0]
下面是Canny算子提取的轮廓图,可以发现效果还是不错,能够看到两幅图中的相似轮廓。
目前已经能够正确匹配出滑块对应缺口的位置了。
滑块破解
bilibili 破解
[py]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231# -*- coding:utf-8 -*-
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from bs4 import BeautifulSoup
from urllib.request import urlretrieve
from PIL import Image
import time, re, random, os
class CrackGeetest():
def __init__(self):
self.url = 'https://passport.bilibili.com/login'
self.browser = webdriver.Chrome()
self.wait = WebDriverWait(self.browser, 10)
def mk_img_dir(self):
"""
创建图片目录文件
:return:
"""
if not os.path.exists('Image'):
os.mkdir('Image')
def get_geetest_image(self):
"""
获取验证码图片
:return: 图片location信息
"""
bg = []
fullgb = []
while bg == [] and fullgb == []:
soup = BeautifulSoup(self.browser.page_source, 'lxml')
bg = soup.find_all('div', class_='gt_cut_bg_slice')
fullgb = soup.find_all('div', class_='gt_cut_fullbg_slice')
bg_url = re.findall('url\(\"(.*?)\"\);', bg[0].get('style'))[0].replace('webp', 'jpg')
fullgb_url = re.findall('url\(\"(.*?)\"\);', fullgb[0].get('style'))[0].replace('webp', 'jpg')
bg_location_list = []
fullgb_location_list = []
for each_bg in bg:
location = {}
location['x'] = int(re.findall('background-position: (.*)px (.*)px;', each_bg.get('style'))[0][0])
location['y'] = int(re.findall('background-position: (.*)px (.*)px;', each_bg.get('style'))[0][1])
bg_location_list.append(location)
for each_fullgb in fullgb:
location = {}
location['x'] = int(re.findall('background-position: (.*)px (.*)px;', each_fullgb.get('style'))[0][0])
location['y'] = int(re.findall('background-position: (.*)px (.*)px;', each_fullgb.get('style'))[0][1])
fullgb_location_list.append(location)
self.mk_img_dir()
urlretrieve(url=bg_url, filename='Image/bg.jpg')
print('缺口图片下载完成!')
urlretrieve(url=fullgb_url, filename='Image/fullgb.jpg')
print('背景图片下载完成!')
return bg_location_list, fullgb_location_list
def get_merge_image(self, filename, location_list):
"""
根据图片位置合并还原
:param filename: 图片
:param location: 位置
:return:合并后的图片对象
"""
im = Image.open(filename)
new_im = Image.new('RGB',(260,116))
im_list_upper = []
im_list_lower = []
for location in location_list:
if location['y'] == -58:
im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x'])+10,116)))
if location['y'] == 0:
im_list_lower.append(im.crop((abs(location['x']),0,abs(location['x'])+10,58)))
x_offset = 0
for img in im_list_upper:
new_im.paste(img, (x_offset, 0))
x_offset+=img.size[0]
x_offset = 0
for img in im_list_lower:
new_im.paste(img, (x_offset, 58))
x_offset+=img.size[0]
new_im.save('Image/'+re.split('[./]', filename)[1]+'1.jpg')
return new_im
def is_px_equal(self, img1, img2, x, y):
"""
判断两个像素是否相同
:param img1: 图片1
:param img2:图片2
:param x:位置1
:param y:位置2
:return:像素是否相同
"""
pix1 = img1.load()[x,y]
pix2 = img2.load()[x,y]
threshold = 60
if abs(pix1[0]-pix2[0]) < threshold and abs(pix1[1]-pix2[1]) < threshold and abs(pix1[2]-pix2[2]) < threshold:
return True
else:
return False
def get_gap(self, img1, img2):
"""
获取缺口偏移量
:param img1: 不带缺口图片
:param img2: 带缺口图片
:return:
"""
left = 60
for i in range(left, img1.size[0]):
for j in range(img1.size[1]):
if not self.is_px_equal(img1, img2, i, j):
left = i
return left
return left
def get_track(self, distance):
"""
根据偏移量和手动操作模拟计算移动轨迹
:param distance: 偏移量
:return: 移动轨迹
"""
# 移动轨迹
tracks = []
# 当前位移
current = 0
# 减速阈值
mid = distance * 4 / 5
# 时间间隔
t = 0.2
# 初始速度
v = 0
while current < distance:
if current < mid:
a = random.uniform(2, 5)
else:
a = -(random.uniform(12.5, 13.5))
v0 = v
v = v0 + a * t
x = v0 * t + 1 / 2 * a * t * t
current += x
if 0.6 < current - distance < 1:
x = x - 0.53
tracks.append(round(x, 2))
elif 1 < current - distance < 1.5:
x = x - 1.4
tracks.append(round(x, 2))
elif 1.5 < current - distance < 3:
x = x - 1.8
tracks.append(round(x, 2))
else:
tracks.append(round(x, 2))
return tracks
def get_slider(self):
"""
获取滑块
:return:滑块对象
"""
try:
slider = self.wait.until(EC.element_to_be_clickable((By.XPATH, '//div[@class="gt_slider"]/div[contains(@class,"gt_slider_knob")]')))
return slider
except TimeoutError:
print('加载超时...')
def move_to_gap(self, slider, tracks):
"""
将滑块移动至偏移量处
:param slider: 滑块
:param tracks: 移动轨迹
:return:
"""
action = ActionChains(self.browser)
action.click_and_hold(slider).perform()
for x in tracks:
action.move_by_offset(xoffset=x,yoffset=-1).perform()
action = ActionChains(self.browser)
time.sleep(0.6)
action.release().perform()
def success_check(self):
"""
验证是否成功
:return:
"""
try:
if re.findall('gt_success', self.browser.page_source, re.S):
print('验证成功!')
return True
else:
print('验证失败!')
return False
except TimeoutError:
print('加载超时...')
finally:
self.browser.close()
if __name__ == '__main__':
try:
while True:
check = CrackGeetest()
check.browser.get(check.url)
bg_location_list, fullgb_location_list = check.get_geetest_image()
img1 = check.get_merge_image('Image/fullgb.jpg', fullgb_location_list)
img2 = check.get_merge_image('Image/bg.jpg', bg_location_list)
# distance应根据实际情况做微调
distance = check.get_gap(img1, img2) * 1.138
slider = check.get_slider()
tracks = check.get_track(distance)
check.move_to_gap(slider, tracks)
time.sleep(0.5)
CHECK = check.success_check()
if CHECK == True:
break
except Exception:
print('程序出错啦!')
qq 的滑块破解
[py]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43from selenium import webdriver
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains # 鼠标拖动
driver = webdriver.Chrome(executable_path="../chromedriver.exe")
url = "https://mail.qq.com/"
driver.get(url)
time.sleep(3)
driver.switch_to.frame("login_frame")
driver.find_element_by_id("u").send_keys("1428515626")
time.sleep(0.5)
driver.find_element_by_id("p").send_keys("cdsfds2121")
time.sleep(1)
WebDriverWait(driver, 10).until(ec.element_to_be_clickable((By.ID, "login_button"))) # 判断按钮是否可以点击
driver.find_element_by_id("login_button").click()
time.sleep(2)
driver.switch_to.frame("tcaptcha_iframe") # 滑块又是一个新的frame框架 需要重新更改
time.sleep(0.4)
while True:
slider = WebDriverWait(driver, 5).until(ec.element_to_be_clickable((By.ID, "tcaptcha_drag_thumb")))
distance = 190
actions = webdriver.ActionChains(driver)
# 点击开始拖拽
actions.click_and_hold(slider) # 先一直点击不动
actions.pause(0.3) # 暂停0.2秒
actions.move_by_offset(distance + 20, 0)
actions.pause(0.15)
actions.move_by_offset(-35, 0)
actions.pause(0.6)
actions.release() # 松开按钮
actions.perform()
time.sleep(2)
try:
shuaxin = WebDriverWait(driver, 1).until(ec.presence_of_element_located((By.ID, "e_reload")))
driver.find_element_by_id("e_reload").click()
time.sleep(1)
except:
driver.quit()
break
小结
希望本文对你有所帮助,如果喜欢,欢迎点赞收藏转发一波。
我是老马,期待与你的下次重逢。