java爬取我主良缘girls

本篇文章主要介绍用java爬取我主良缘网站上女孩的信息。用URL进行网站,用jxl把爬取到信息存储到excel表中,jxl的应用请参考另一篇文章java 中JXL操作Excel实例详解;爬取信息时获取到的是json格式的信息,解析json以及json的应用请参考另一篇文章这里写链接内容

爬取程序如下:

package com.lzj.spider;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;

/**
 * Hello world!
 *
 */
public class App 
{

    static WritableWorkbook[] books = new WritableWorkbook[4];
    static WritableSheet[] sheets = new WritableSheet[4];

    public static void main( String[] args )
    {
        /*condition*/
        int page = 0;
        String gender = "2";  /*女*/
        List<Person> persons = null;
        InputStream inputStream = null;
        InputStreamReader inputStreamReader = null;
        BufferedReader bufferedReader = null;
        int row0 = 1;
        int row1 = 1;
        int row2 = 1;
        int row3 = 1;

        try {
            /*create excel*/
            createExcel();

            while(true){
                String condition = "page=" + page + "&gender=" + gender;
                URL url = new URL("http://www.lovewzly.com/api/user/pc/list/search?" + condition);
                inputStream = url.openStream();
                inputStreamReader = new InputStreamReader(inputStream, "utf-8");
                bufferedReader = new BufferedReader(inputStreamReader);
                /*parser data*/
                persons = MyService.execute(bufferedReader);
                if (persons == null || persons.size() == 0) {
                    System.out.println("over");
                    break;
                }
                /*store girls information to excel*/
                for(Person person : persons){
                    if (person.getBirthdayyear() == null || person.getBirthdayyear().trim().isEmpty()) {
                        break;
                    }
                    if (person.getBirthdayyear().compareTo("1998") >= 0) {
                        /*younger than 20*/
                        MyService.writeExcel(books[0], sheets[0], row0, person);
                        MyService.getPicture(person, "E:/MyData/picture/小于20岁");
                        row0++;
                    } else if (person.getBirthdayyear().compareTo("1998") < 0 && person.getBirthdayyear().compareTo("1988") >= 0) {
                        /*old than 20 and younger than 30*/
                        MyService.writeExcel(books[1], sheets[1], row1, person);
                        MyService.getPicture(person, "E:/MyData/picture/20-30岁");
                        row1++;
                    } else if (person.getBirthdayyear().compareTo("1988") < 0 && person.getBirthdayyear().compareTo("1978") > 0) {
                        /*old than 30 and younger than 40*/
                        MyService.writeExcel(books[2], sheets[2], row2, person);
                        MyService.getPicture(person, "E:/MyData/picture/30-40岁");
                        row2++;
                    } else {
                        /*old than 40*/
                        MyService.writeExcel(books[3], sheets[3], row3, person);
                        MyService.getPicture(person, "E:/MyData/picture/大于50岁");
                        row3++;
                    }
                }
                ++page;
//              if (page == 1) {
//                  break;
//              }

            }
        } catch (Exception e) {
            System.out.println("over");
            e.printStackTrace();
        } finally {
            System.out.println("hello finally");
            try {
                for(WritableWorkbook book : books){
                    book.write();
                    book.close();
                }
            } catch (Exception e2) {
                e2.printStackTrace();
            }
            try {
                bufferedReader.close();
                inputStreamReader.close();
                inputStream.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }

    public static void createExcel() throws IOException, RowsExceededException, WriteException{
        books[0] = Workbook.createWorkbook(new File("E:/MyData/20岁以下少女.xls"));
        books[1] = Workbook.createWorkbook(new File("E:/MyData/20-30岁青年女孩.xls"));
        books[2] = Workbook.createWorkbook(new File("E:/MyData/30-40岁中年女人.xls"));
        books[3] = Workbook.createWorkbook(new File("E:/MyData/50岁以上中老年妇女.xls"));
        sheets[0] = books[0].createSheet("第一页", 0);
        sheets[1] = books[1].createSheet("第一页", 0);
        sheets[2] = books[2].createSheet("第一页", 0);
        sheets[3] = books[3].createSheet("第一页", 0);
        int i;
        for(i=0; i<4; i++){
            sheets[i].addCell(new Label(0, 0, "userid"));
            sheets[i].addCell(new Label(1, 0, "username"));
            sheets[i].addCell(new Label(2, 0, "gender"));
            sheets[i].addCell(new Label(3, 0, "education"));
            sheets[i].addCell(new Label(4, 0, "height"));
            sheets[i].addCell(new Label(5, 0, "birthdayyear"));
            sheets[i].addCell(new Label(6, 0, "province"));
            sheets[i].addCell(new Label(7, 0, "city"));
            sheets[i].addCell(new Label(8, 0, "monolog"));
        }
    }
}
package com.lzj.spider;
public class Person {
    private String userid;
    private String username;
    private String gender;      /*1:male    2:female*/
    private String education;
    private String height;
    private String birthdayyear;
    private String province;
    private String city;
    private String monolog;
    private String avatar;      /*picture*/
    /*省略get/set/toString方法*/
}
package com.lzj.spider;
import java.awt.image.BufferedImage;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.imageio.ImageIO;
import javax.imageio.stream.FileCacheImageInputStream;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;

public class MyService {

    /*paser json data*/
    public static List<Person> execute(Reader reader){
        GsonBuilder builder = new GsonBuilder();
        Gson gson = builder.create();
        JsonParser parser = new JsonParser();
        JsonElement jsonElement = parser.parse(reader);
        JsonObject jsonObject = jsonElement.getAsJsonObject();
        JsonObject jsonObjectData = jsonObject.getAsJsonObject("data");
        int num = jsonObjectData.get("num").getAsInt();
        if (num == 0) {
            return null;
        }
        JsonArray jsonArrayList = jsonObjectData.getAsJsonArray("list");
        List<Person> persons = new ArrayList<Person>();
        for(JsonElement element : jsonArrayList){
            Person person = gson.fromJson(element, Person.class);
            persons.add(person);
        }
        return persons;
    }

    public static Map<String, Object> createExcel() throws IOException, RowsExceededException, WriteException{
        Map<String, Object> excels = new HashMap<String, Object>();
        WritableWorkbook[] book = new WritableWorkbook[4];
        WritableSheet[] sheet = new WritableSheet[4];
        book[0] = Workbook.createWorkbook(new File("E:/MyData/20岁以下少女.xls"));
        book[1] = Workbook.createWorkbook(new File("E:/MyData/20-30岁青年女孩.xls"));
        book[2] = Workbook.createWorkbook(new File("E:/MyData/30-40岁中年女人.xls"));
        book[3] = Workbook.createWorkbook(new File("E:/MyData/50岁以上中老年妇女.xls"));
        sheet[0] = book[0].createSheet("第一页", 0);
        sheet[1] = book[1].createSheet("第一页", 0);
        sheet[2] = book[2].createSheet("第一页", 0);
        sheet[3] = book[3].createSheet("第一页", 0);
        int i;
        for(i=0; i<4; i++){
            sheet[i].addCell(new Label(0, 0, "userid"));
            sheet[i].addCell(new Label(1, 0, "username"));
            sheet[i].addCell(new Label(2, 0, "gender"));
            sheet[i].addCell(new Label(3, 0, "education"));
            sheet[i].addCell(new Label(4, 0, "height"));
            sheet[i].addCell(new Label(5, 0, "birthdayyear"));
            sheet[i].addCell(new Label(6, 0, "province"));
            sheet[i].addCell(new Label(7, 0, "city"));
            sheet[i].addCell(new Label(8, 0, "monolog"));
//          book[i].write();
        }
        excels.put("book", book);
        excels.put("sheet", sheet);
        return excels;
    }


    public static void writeExcel(WritableWorkbook book, WritableSheet sheet, Integer row, Person person) throws RowsExceededException, WriteException, IOException{
        System.out.println("&&&&: " + row);
        System.out.println("person : " + person);
        sheet.addCell(new Label(0, row, person.getUserid()));
        sheet.addCell(new Label(1, row, person.getUsername()));
        sheet.addCell(new Label(2, row, person.getGender()));
        sheet.addCell(new Label(3, row, person.getEducation()));
        sheet.addCell(new Label(4, row, person.getHeight()));
        sheet.addCell(new Label(5, row, person.getBirthdayyear()));
        sheet.addCell(new Label(6, row, person.getProvince()));
        sheet.addCell(new Label(7, row, person.getCity()));
        sheet.addCell(new Label(8, row, person.getMonolog()));
    }

    /*first advice*/
    public static void getPicture(Person person, String path){
        String urlString = person.getAvatar();
        if (urlString == null || urlString.trim().isEmpty()) {
            return;
        }
        String file = path + "/" + person.getUserid() + "-" + person.getUsername() + "-" + person.getBirthdayyear() + ".jpg";
        try {
            URL url = new URL(person.getAvatar());
            BufferedImage img = ImageIO.read(url);
            ImageIO.write(img, "jpg", new File(file));
        } catch (Exception e) {
            e.printStackTrace();
        } 
    }

    /*second advice*/
    public static void getPicture2(Person person, String path){
        FileOutputStream out = null;
        BufferedInputStream in = null;
        HttpURLConnection connection = null;
        byte[] buf = new byte[1024];
        int len = 0;
        try {
            URL url = new URL(person.getAvatar());
            connection = (HttpURLConnection)url.openConnection();
            connection.connect();
            in = new BufferedInputStream(connection.getInputStream());
            String file = path + "/" + person.getUserid() + "-" + person.getUsername() + "-" + person.getBirthdayyear() + ".jpg";
            out = new FileOutputStream(file);
            while ((len = in.read(buf)) != -1) {
                out.write(buf, 0, len);
            }
            out.flush();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                in.close();
                out.close();
                connection.disconnect();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

猜你喜欢

转载自blog.csdn.net/u010502101/article/details/80719210
今日推荐