图像抓取和处理
概述
一年一度的集五福活动又开始了,即使到最后也就几块钱,也不能阻挡大家集五福的兴趣。集五福需要利用AR扫带有福字的东西,我记得我之前都是从网上下载各类福字图片去扫,下载起来也挺费劲的,于是今年就直接爬取了百度图片中以“福”、“福气”、“fu”为关键词的图片。后来想到之前在学图像时,有很多图像基本处理操作,就有动手对图像做了一点操作,使得一个福字变为多个福字 。后台回复“福”获取170多张福字图片。创作不易,点个“在看”呗。
图片抓取
图片抓取并没有采用什么框架去抓取,就是非常简单的url请求数据、解析数据、数据持久化。在这里就不赘述了,想了解的可以看我之前的推文几十行代码批量下载高清壁纸 爬虫入门实战。
实现代码如下
# -*- coding:utf-8 -*-
import requests
import os
import re
import time
import random
def getManyPages(keyword, pages):
params = []
for i in range(30, 30 * pages + 30, 30):
params.append({
})
url = 'https://image.baidu.com/search/acjson'
urls = []
for i in params:
urls.append(requests.get(url, params=i).json().get('data'))
return urls
def getpage(key, page):
new_url = []
for i in range(0, page * 30 + 30, 30):
new_url.append({
})
url = 'https://image.baidu.com/search/acjson'
result = []
for i in new_url:
randnumber1 = random.randint(0, 3) # 生成随机数
time.sleep(randnumber1) # 按随机数延时
print(i)
try:
result.append(requests.get(url, params=i).json().get('data'))
print(result)
except: # 如果延时之后还是被拒绝
# print('error\n')
randnumber2 = random.randint(5, 10) # 延迟随机时间
time.sleep(randnumber2)
return result
def getImg(dataList, localPath, keyword):
i = 1
x = 0
for list in dataList:
for each in list:
try:
if each.get('thumbURL') != None:
print('downloading:%s' % each.get('thumbURL'))
pic = requests.get(each.get('thumbURL'))
except requests.exceptions.ConnectionError:
print('error: This photo cannot be downloaded')
continue
dir = 'image/' + keyword + '_' + str(i) + '.jpg'
fp = open(dir, 'wb')
fp.write(pic.content)
fp.close()
i += 1
def dowmloadPic(html, keyword):
pic_url = re.findall('"objURL":"(.*?)",', html, re.S)
i = 1
print('Can not find key word:' + keyword + 'is downloading...')
for each in pic_url:
print('No ' + str(i) + '.jpg is downloading ,URL:' + str(each))
try:
pic = requests.get(each, timeout=10)
except requests.exceptions.ConnectionError:
print('error: This photo cannot be downloaded')
continue
dir = 'image/' + keyword + '_' + str(i) + '.jpg'
fp = open(dir, 'wb')
fp.write(pic.content)
fp.close()
i += 1
if __name__ == '__main__':
keyword = '福气' # 改变keyword就可以得到你要搜索的图片
dataList = getpage(keyword, 2) # key word and number of page
getImg(dataList, '/images', keyword) # path
图片处理
在这一部分,我就做了三个最简单的图像处理(其实只是调用了函数而已),基本一两句话就完成了。分别对图像做了“灰度处理”、“像素逆转”、“图片倒置”。代码如下。
import cv2
from matplotlib import pyplot as plt
import numpy as np
imgOgr = cv2.imread('D://python_pycharm//MyPy//image//fu_7.jpg')
# 转换颜色模式,显示原图
img = cv2.cvtColor(imgOgr, cv2.COLOR_BGR2RGB)
# 灰度图
img_gray = cv2.cvtColor(imgOgr, cv2.COLOR_RGB2GRAY)
# 反色
img_array = np.asarray(img)
img_re = 255 - img_array
# 倒
img_r = cv2.flip(img, -1)
plt.subplot(2, 2, 1), plt.imshow(img)
plt.title('Original'), plt.xticks([]), plt.yticks([])
plt.subplot(2, 2, 2), plt.imshow(img_gray, cmap="gray")
plt.title('Gray'), plt.xticks([]), plt.yticks([])
plt.subplot(2, 2, 3), plt.imshow(img_re)
plt.title('re'), plt.xticks([]), plt.yticks([])
plt.subplot(2, 2, 4), plt.imshow(img_r)
plt.title('r'), plt.xticks([]), plt.yticks([])
plt.show()
效果展示
1.图片抓取效果
2.图像处理结果