爬取我主良缘,获取个人图片及其信息

网站:http://www.lovewzly.com/jiaoyou.html

工具:python3 ,pycharm ,火狐浏览器自带F12功能

目标:获取妹子信息,将图片与个人信息分开保存在不同文件夹下

步骤:分为一下四步:

  1. 设置条件
  2. 解析网页
  3. 下载图片
  4. 保存信息
# coding=utf-8
import os
import requests

#设置条件
def quary_age():
    age = int(input("请输入期望对象年龄(如20):"))
    if 21 <= age <= 30:
        startage = 21
        endage = 30
    elif 31 <= age <= 40:
        startage = 31
        endage = 40
    elif 41 <= age  <=50:
        startage = 41
        endage = 50
    return startage,endage;

def quary_sex():
    sex = input("请输入期望对象的性别(如女):")
    if sex == "男":
        gender = 1
    else:
        gender = 2
    return gender;

def quary_height():
    height = int(input("请输入期望对象身高(如163):"))
    if 151 <= height <161:
        startheight = 151
        endheight = 160
    elif 161 <= height <171:
        startheight = 161
        endheight = 170
    elif 171 <= height <181:
        startheight = 171
        endheight = 180
    elif 181 <= height <191:
        startheight = 181
        endheight = 190
    else:
        startheight = 0
        endheight = 0
    return startheight,endheight;

def quary_salary():
    money = int(input("请输入期望对象薪资(如2000):"))
    if 2000 <= money < 5000:
        salary = 2
    elif 5000 <= money <10000:
        salary = 3
    elif 10000 <= money <20000:
        salary = 4
    elif money >= 20000:
        salary = 5
    else:
        salary = 0
    return salary;

#查询符合条件的数据
def quary_data():
    print ("请输入你的筛选条件,开始本次姻缘:")
    startage, endage = quary_age() #年龄
    gender = quary_sex()   #性别
    startheight, endheight = quary_height() #身高
    salary = quary_salary() #薪资

    for i in range(1,11):
        json = get_one(startage, endage,gender,startheight, endheight,salary,i)
        for item in json['data']['list']:
            save_image(item)#保持照片
            save_info(item)#保存个人信息


def save_image(item):
    if not os.path.exists('image'):
        os.mkdir('image')
    image_url = item['avatar']
    response = requests.get(image_url)
    if response.status_code == 200:
        file_path = 'image/{}.jpg'.format(item['username'])
        if not os.path.exists(file_path) : #防止图片重复保存
            print("正在获取%s的信息"%item['username'])
            with open(file_path,'wb') as f:
                f.write(response.content)#content获取图片内容
    else:
        print("已经保存过当前信息")

def save_info(item):
    if not os.path.exists('info'):
        os.mkdir('info')
    with open('info/'+item['username']+'.txt','w',encoding = 'utf-8') as f:
        f.write('名字:'+item['username']+',城市:'+item['city']+',身高:'+item['height']+',学历:'+item['education']+',个人签名:'+item['monolog']+',出生年份:'+item['birthdayyear'])

#接受参数,返回json数据
def get_one(startage, endage,gender,startheight, endheight,salary,page):
    headers = {'Referer':'http://www.lovewzly.com/jiaoyou.html',
               'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/61.0'}
    base_url = 'http://www.lovewzly.com/api/user/pc/list/search?startage={}&endage={}&gender={}&cityid=60&startheight={}&endheight={}&marry=1&salary={}&page={}'.format(startage,endage,gender,startheight,endheight,salary,page)
    while True:
        try:
            response = requests.get(base_url,headers)
            if response.status_code == 200:
                return response.json()#也可以返回response.text,但要转码
        except:
            return None

quary_data()


#http://www.lovewzly.com/api/user/pc/list/search?
# startage=21&endage=30&gender=2&cityid=60&startheight=151&endheight=160&marry=1&salary=2&page=1

效果如下:

说明:条件设置有很多,像其他的如星座、学历、生肖我都没设定,如果需要,可以自己添加,城市我默认搜厦门,如果想搜其他城市,可以F12自己查看其他城市的value,自己修改。

猜你喜欢

转载自blog.csdn.net/wzyaiwl/article/details/81483342
今日推荐