Java生成千万级别MySQL测试数据

概述

本实验目的是尝试选择一种最优方式向MySQL数据库插入5000W条随机测试数据。以下内容包括JDBC、Mybatis和Spring JDBCTemplate各种插入方式的效率分析、参数调整以及超大数据量插入的解决办法。

JDBC各种插入方式比较

我们知道,jdbc将数据插入数据库有多种方式,包括常用的单条记录插入、多Values拼接SQL字符串和批量插入的方式,在此基础上还可以增加事务提交和预编译的策略来提高插入效率,下面以100W数据为例,分别测试单条记录、多Values拼接SQL字符串、批量插入、多Values拼接SQL字符串+事务提交、批量插入+事务提交形式的执行效率。

单条记录插入
Connection conn = null;
PreparedStatement pstm =null;
ResultSet rt = null;
try {
    conn = DBConnection.getConnection();
    String sql = "INSERT INTO student2(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES(?,?,?,?,?,?,?)";
    pstm = conn.prepareStatement(sql);
    Long startTime = System.currentTimeMillis();
    int count = 0;
    System.out.println("开始插入...");
    Student student = null;
    for (int i = 0; i < 100000; i++){
        pstm.setLong(1, i);
        pstm.setString(2,String.valueOf(i));
        pstm.setString(3, String.valueOf(i));
        pstm.setString(4, String.valueOf(i));
        pstm.setString(5, String.valueOf(i));
        pstm.setString(6, new java.util.Date().toLocaleString());
        pstm.setBoolean(7, true);
        pstm.execute();
    }

    Long endTime = System.currentTimeMillis();
    System.out.println("插入完成,总用时:" + (endTime - startTime)+"ms");
} catch (Exception e) {
    e.printStackTrace();
    throw new RuntimeException(e);
}finally{
    if(pstm!=null){
        try {
            pstm.close();
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
    if(conn!=null){
        try {
            conn.close();
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
}

插入条数:100000条
用时:155742ms、157012ms、156323ms
多Values形式
Connection conn = null;
PreparedStatement pstm =null;
ResultSet rt = null;
try {
    conn = DBConnection.getConnection();
    StringBuffer sql = new StringBuffer("INSERT INTO student(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES");
    Long startTime = System.currentTimeMillis();
    System.out.println("开始插入");
    int count = 0;
    for(int i=0;i<100000;i++){
        if(count!=0){
            sql.append(",");
        }
        count++;
        sql.append("("+i+",'"+i+"','"+i+"','"+i+
                "','"+i+"','"+new java.util.Date().toLocaleString()+"',"+true+")");
        if(count==25000){
            pstm = conn.prepareStatement(sql.toString());
            pstm.execute();
            count = 0;
            sql = new StringBuffer("INSERT INTO student(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES");
        }
    }
    Long endTime = System.currentTimeMillis();
    System.out.println("插入数据库总用时:" + (endTime - startTime)+"ms");
} catch (Exception e) {
    e.printStackTrace();
    throw new RuntimeException(e);
}finally{
    if(pstm!=null){
        try {
            pstm.close();
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
    if(conn!=null){
        try {
            conn.close();
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
}

插入条数:100000
插入耗时:2092ms、1922ms、2012ms
批量插入
Connection conn = null;
PreparedStatement pstm =null;
try {
    conn = DBConnection.getConnection();
    String sql = "INSERT INTO student2(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES(?,?,?,?,?,?,?)";
    pstm = conn.prepareStatement(sql);
    Long startTime = System.currentTimeMillis();
    int count =0;
    System.out.println("开始插入...");
    for(int i = 0;i<100000;i++){
            pstm.setLong(1,i);
            pstm.setString(2,String.valueOf(i));
            pstm.setString(3,String.valueOf(i));
            pstm.setString(4,String.valueOf(i));
            pstm.setString(5,String.valueOf(i));
            pstm.setString(6,new java.util.Date().toLocaleString());
            pstm.setBoolean(7,true);
            pstm.addBatch();
            count++;
            if(count>=GenerateRandomData.BATCH_SIZE) {
                //每GenerateRandomData.BATCH_SIZE条数据进行一次批量插入操作
                int[] ids = pstm.executeBatch();
                pstm.clearBatch();
                count = 0;
            }
    }
    Long endTime = System.currentTimeMillis();
    System.out.println("插入完成,总用时:" + (endTime - startTime)+"ms");
} catch (Exception e) {
    e.printStackTrace();
    throw new RuntimeException(e);
}finally{
    if(pstm!=null){
        try {
            pstm.close();
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
    if(conn!=null){
        try {
            conn.close();
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
}

插入条数:100000
插入耗时:135883ms、136233ms、135235ms
多Values拼接SQL字符串+事务
Connection conn = null;
PreparedStatement pstm =null;
ResultSet rt = null;
try {
    conn = DBConnection.getConnection();
    StringBuffer sql = new StringBuffer("INSERT INTO student(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES");
    //手动事务
    conn.setAutoCommit(false);
    Long startTime = System.currentTimeMillis();
    System.out.println("开始插入");
    int count = 0;
    Student student = null;
    for(int i=0;i<100000;i++){
        if(count!=0){
            sql.append(",");
        }
        count++;
        sql.append("("+i+",'"+i+"','"+i+"','"+i+ "','"+i+"','"+new java.util.Date().toLocaleString()+"',"+true+")");
        if(count==25000){
            pstm = conn.prepareStatement(sql.toString());
            pstm.execute();
            conn.commit();
            count = 0;
            sql = new StringBuffer("INSERT INTO student(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES");
        }
    }
    Long endTime = System.currentTimeMillis();
    System.out.println("插入数据库总用时:" + (endTime - startTime));
} catch (Exception e) {
    e.printStackTrace();
    throw new RuntimeException(e);
}finally{
    if(pstm!=null){
        try {
            pstm.close();
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
    if(conn!=null){
        try {
            conn.close();
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
}

插入条数:100000
插入耗时:2021ms、2025ms、1921ms
批量插入+事务提交
Connection conn = null;
PreparedStatement pstm =null;
try {
    conn = DBConnection.getConnection();
    String sql = "INSERT INTO student2(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES(?,?,?,?,?,?,?)";
    pstm = conn.prepareStatement(sql);
    //手动事务
    conn.setAutoCommit(false);
    Long startTime = System.currentTimeMillis();
    int count =0;
    System.out.println("开始插入...");
    for(int i = 0;i<100000;i++){
            pstm.setLong(1,i);
            pstm.setString(2,String.valueOf(i));
            pstm.setString(3,String.valueOf(i));
            pstm.setString(4,String.valueOf(i));
            pstm.setString(5,String.valueOf(i));
            pstm.setString(6,new java.util.Date().toLocaleString());
            pstm.setBoolean(7,true);
            pstm.addBatch();
            count++;
            if(count>=25000) {
                //每25000条数据进行一次批量插入操作
                int[] ids = pstm.executeBatch();
                pstm.clearBatch();
                conn.commit();
                count = 0;
            }
    }
    Long endTime = System.currentTimeMillis();
    System.out.println("插入完成,总用时:" + (endTime - startTime)+"ms");
} catch (Exception e) {
    e.printStackTrace();
    throw new RuntimeException(e);
}finally{
    if(pstm!=null){
        try {
            pstm.close();
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
    if(conn!=null){
        try {
            conn.close();
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
}

插入条数:100000
插入耗时:8970ms、9051ms、8825ms
预编译+批量插入+事务提交
在批量插入+事务提交基础上修改JDBCUrl,添加useServerPrepStmts=true
    
插入条数:100000
插入耗时:8022ms、8058ms、7983ms

Mybatis插入方式比较

mybatis主要有三种插入方式:for循环insert、mybatis的insertBatch、批量foreach插入

for循环insert
Long start = System.currentTimeMillis();
for(int i = 0;i<100000;i++){
    Student student = new Student();
    //随机生成姓名
    student.setStuName(lastNames[randomInt(14)]+firstNames[randomInt(31)]);
    //随机生成学号
    student.setStuNumber(stuNumberStarts[randomInt(5)]+getRandom(9));
    //随机生成学院ID与名称
    int randomInt = randomInt(6);
    student.setcId(collegeIDs[randomInt]);
    student.setcName(collegeNames[randomInt]);
    //随机生成状态,false概率为0.1%
    Boolean flag = randomInt(1000)>0?true:false;
    student.setStatus(flag);
    //随机生成手机号
    student.setStuPhoneNumber(phoneNumberStarts[randomInt(7)]+getRandom(8));
    student.setStuCreateTime(new Date());

    studentDao.insert(student);
}
Long end = System.currentTimeMillis();

System.out.println("用时:"+(end-start));

插入条数:100000条
插入耗时:181488ms、181001ms、192105ms
mybatis的insertBatch
SqlSessionTemplate sqlSessionTemplate = new SqlSessionTemplate(sqlSessionFactory);
SqlSession sqlSession = sqlSessionTemplate.getSqlSessionFactory().openSession(ExecutorType.BATCH,false);
Long start = System.currentTimeMillis();
for(int i = 0;i<100000;i++){
    //随机生成姓名
    Student student = new Student();
    student.setStuName(lastNames[randomInt(14)]+firstNames[randomInt(31)]);
    //随机生成学号
    student.setStuNumber(stuNumberStarts[randomInt(5)]+getRandom(9));
    //随机生成学院ID与名称
    int randomInt = randomInt(6);
    student.setcId(collegeIDs[randomInt]);
    student.setcName(collegeNames[randomInt]);
    //随机生成状态,false概率为0.1%
    Boolean flag = randomInt(1000)>0?true:false;
    student.setStatus(flag);
    //随机生成手机号
    student.setStuPhoneNumber(phoneNumberStarts[randomInt(7)]+getRandom(8));
    student.setStuCreateTime(new Date());

    studentDao.insert(student);
}
sqlSession.commit();
Long end = System.currentTimeMillis();
System.out.println("用时:"+(end-start));

插入条数:100000条
插入耗时:162081ms、160213ms、158989ms
批量foreach插入
Long start = System.currentTimeMillis();
for(int i = 0;i<100000;i++){
    Student student = new Student();
    //随机生成姓名
    student.setStuName(lastNames[randomInt(14)]+firstNames[randomInt(31)]);
    //随机生成学号
    student.setStuNumber(stuNumberStarts[randomInt(5)]+getRandom(9));
    //随机生成学院ID与名称
    int randomInt = randomInt(6);
    student.setcId(collegeIDs[randomInt]);
    student.setcName(collegeNames[randomInt]);
    //随机生成状态,false概率为0.1%
    Boolean flag = randomInt(1000)>0?true:false;
    student.setStatus(flag);
    //随机生成手机号
    student.setStuPhoneNumber(phoneNumberStarts[randomInt(7)]+getRandom(8));
    student.setStuCreateTime(new Date());

    students.add(student);

}
studentDao.insertBatch(students);

Long end = System.currentTimeMillis();

System.out.println("用时:"+(end-start));

插入条数:100000条
插入耗时:2898ms、2697ms、2813ms

Spring JDBCTemplate

List<Object[]> students = new ArrayList<>();
Long start = System.currentTimeMillis();
String sql = " insert into student(c_id, stu_name, stu_number,c_name, stu_phone_number, stu_create_time,status) values(?,?,?,?,?,?,?)";
for(int i = 0;i<100000;i++){
    //随机生成学院ID与名称
    int randomInt = randomInt(6);
    Object[] student = {collegeIDs[randomInt],lastNames[randomInt(14)]+firstNames[randomInt(31)],stuNumberStarts[randomInt(5)]+getRandom(9),
            collegeNames[randomInt],phoneNumberStarts[randomInt(7)]+getRandom(8),new Date(),randomInt(1000)>0?true:false};
    students.add(student);
}

jdbcTemplate.batchUpdate(sql,students);
Long end = System.currentTimeMillis();

System.out.println("用时:"+(end-start));

插入条数:100000条
插入耗时:142744ms、145688ms、140214ms

多种方式比较

第一次结果 第二次结果 第三次结果 平均值
JDBC for循环insert 155742ms 157012ms 156323ms 156359ms
JDBC多Values 2092ms 1922ms 2012ms 2009ms
JDBC批量插入 135883ms 136233ms 135235ms 135784ms
JDBC多Values+事务提交 2021ms 2025ms 1921ms 1989ms
JDBC批量插入+事务提交 8970ms 9051ms 8825ms 8949ms
JDBC预编译+批量插入+事务提交 8022ms 8058ms 7983ms 8021ms
Mybatis for循环insert 181488ms 181001ms 192105ms 184865ms
Mybatis的insertBatch 162081ms 160213ms 158989ms 160427ms
批量foreach插入 2898ms 2697ms 2813ms 2803ms
Spring JDBCTemplate 142744ms 145688ms 140214ms 142882ms

综上结果分析得出:

  1. 大数据量插入时,批量插入方式要优于单条插入
  2. 手动事务提交可以大幅度提高数据写入速度
  3. 预编译也可以提升数据库插入速度
  4. Mybatis 批量foreach插入与多values的方式在10W数据量时测试结果明显优于其他方式,最佳为多Values形式。

虽然本次实验结果显示多Values形式的写入方式优势明显,但是在本机测试时,当数据量达到800W左右时将会出现OOM异常。

生成5000W随机测试数据

通过上面的比较,最终选择的方案是使用Mybatis 批量foreach插入的方式来实现此功能。

生成数据操作
请添加图片描述
在这里插入图片描述

直接上代码

properties配置文件

#学院名称
collection.collegeNames = 信息学院,美术学院,机械学院,建筑学院,物流学院,外国语学院,马克思学院,历史学院,统计学院,化工学院,环境学院,地理学院,物理学院,教育学院
#教学楼
collection.address = 弘毅楼,致远楼,明义楼,锐思楼
#学生姓
student.lastNames = 张,王,李,赵,刘,胡,陈,苗,戴,习,毛,朱,韩,陆,孙
#学生名
student.firstNames = 兰,兰兰,明,明明,红,楠,中,庆,滔,人,同,娟,娟娟,丽,美丽,利,陆,空间,建国,国庆,狗蛋,狗剩,营,丽雅,长城,尚,飞,果,娜娜,亚楠,岁月
#电话开头三位
phoneNumberStarts = 133,150,153,178,177,180,181,189,187,130,131,132
#学生学号前四位入学年份
student.stuNumberStarts = 2017,2018,2016,2019,2020
#学生数量
student.size = 50000000
学院数据生成

xml

<!--新增学院-->
<insert id="insert" keyProperty="cId" useGeneratedKeys="true">
    insert into college(c_name, c_address, c_phone_number, c_create_time)
    values (#{cName}, #{cAddress}, #{cPhoneNumber}, #{cCreateTime})
</insert>

dao接口

/**
 * 添加学院
 *
 * @param college 学院
 * @return 结果
 */
int insert(College college);

serviceIpml

package com.zsl.codeClub.service.impl;

import com.github.pagehelper.PageHelper;
import com.github.pagehelper.PageInfo;
import com.zsl.codeClub.dao.StudentDao;
import com.zsl.codeClub.entity.College;
import com.zsl.codeClub.dao.CollegeDao;
import com.zsl.codeClub.service.CollegeService;
import com.zsl.codeClub.utils.JsonResult;
import com.zsl.codeClub.utils.PageUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Random;

/**
 * 学院
 * @author sl.zhang
 * @since 2020-07-24 09:55:49
 */
@Service("collegeService")
public class CollegeServiceImpl implements CollegeService {
    @Resource
    private CollegeDao collegeDao;

    //学院名称
    @Value("${collection.collegeNames}")
    private String[] collegeNames;
    //地址
    @Value("${collection.address}")
    private String[] address;
    //电话开头三位
    @Value("${phoneNumberStarts}")
    private String[] phoneNumberStarts;



    @Override
    public JsonResult insertBatch() {
        int count = 0;
        Long start = System.currentTimeMillis();
        for(int i = 0;i<collegeNames.length;i++){
            College college = new College();
            college.setcName(collegeNames[i]);
            //随机生成状态,false概率为0.1%
            Boolean flag = randomInt(1000)>0?true:false;
            college.setStatus(flag);
            //随机生成手机号
  college.setcPhoneNumber(phoneNumberStarts[randomInt(phoneNumberStarts.length)]+getRandom(8));
            college.setcAddress(address[randomInt(address.length)]);
            college.setcCreateTime(new Date());
            int insert = collegeDao.insert(college);
            count +=insert;
        }
        Long end = System.currentTimeMillis();
        return JsonResult.ok("time",end-start).put("size",count);
    }

    /**
     * 获取随机数,取值范围为[0,(n-1)]
     * @param n
     * @return
     */
    public static int randomInt(int n){
        Random random = new Random();
        return random.nextInt(n);
    }

    /**
     * 生成随机字符串
     * @return
     */
    public  String getRandom(int count){
        //元素
        int[] array = {0,1,2,3,4,5,6,7,8,9};
        //随机对象
        Random rand = new Random();
        //循环产生
        for (int i = 10; i > 1; i--) {
            int index = rand.nextInt(i);
            int tmp = array[index];
            array[index] = array[i - 1];
            array[i - 1] = tmp;
        }
        //拼接结果为字符串
        int result = 0;
        for(int i = 0; i < count; i++){
            result = result * 10 + array[i];
        }
        String sixString = Integer.toString(result);
        //有可能出现count-1位数,前面加0补全
        if (sixString.length() == count-1) {
            sixString = "0" + sixString;
        }
        return sixString;
    }
}
学生数据生成

xml

<!--批量插入-->
<insert id="insertBatch">
    insert into student(c_id, stu_name, stu_number,c_name, stu_phone_number, stu_create_time,status)
    values
    <foreach collection="list" item="item" separator=",">
        (#{item.cId},#{item.stuName},#{item.stuNumber},#{item.cName},#{item.stuPhoneNumber},#{item.stuCreateTime},#{item.status})
    </foreach>
</insert>

<!--查询最大学号-->
<select id="selectMaxStuNumber" resultType="string">
    select max(stu_number) from student;
</select>

dao接口

/**
 * 批量插入
 * @param students
 */
int insertBatch(List<Student> students);

/**
 * 获取最大学号
 * @return
 */
String selectMaxStuNumber();

serviceImpl

package com.zsl.codeClub.service.impl;

import com.github.pagehelper.PageHelper;
import com.github.pagehelper.PageInfo;
import com.zsl.codeClub.dao.CollegeDao;
import com.zsl.codeClub.entity.College;
import com.zsl.codeClub.entity.Student;
import com.zsl.codeClub.dao.StudentDao;
import com.zsl.codeClub.service.StudentService;
import com.zsl.codeClub.utils.JsonResult;
import com.zsl.codeClub.utils.PageUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Random;

/**
 * 学生
 * @author sl.zhang
 * @since 2020-07-24 10:22:31
 */
@Service("studentService")
public class StudentServiceImpl implements StudentService {
    @Resource
    private StudentDao studentDao;
    @Resource
    private CollegeDao collegeDao;


    //姓名
    @Value("${student.lastNames}")
    private String[] lastNames;
    @Value("${student.firstNames}")
    private String[] firstNames;
    //电话开头三位
    @Value("${phoneNumberStarts}")
    private String[] phoneNumberStarts;
    //学生学号前四位入学年份
    @Value("${student.stuNumberStarts}")
    private String[] stuNumberStarts;
    //学生数量
    @Value("${student.size}")
    private Integer size;


    @Override
    public JsonResult generate() {
        Long start = System.currentTimeMillis();
        List<Student> students = new ArrayList<>();
        //获取学院信息
        List<College> colleges = collegeDao.queryAll(null);
        if(colleges==null||colleges.size()==0){
            return JsonResult.error(400,"未添加学院信息");
        }

        //查询最大学号
        String stuNumber = studentDao.selectMaxStuNumber();
        Long startNumber = 0L;
        if(stuNumber!=null){
            startNumber = Long.parseLong(stuNumber);
        }
        //统计生成个数
        Long count = 0L;

        for(int i = 0;i<size;i++){
            Student student = new Student();
            //随机生成姓名
            student.setStuName(lastNames[randomInt(14)]+firstNames[randomInt(31)]);
            //随机生成学号
            student.setStuNumber(stuNumberStarts[randomInt(stuNumberStarts.length)]+generateStuNumber(++startNumber));
            //随机生成学院ID与名称
            int randomInt = randomInt(colleges.size());
            student.setcId(colleges.get(randomInt).getcId());
            student.setcName(colleges.get(randomInt).getcName());
            //随机生成状态,false概率为0.1%
            Boolean flag = randomInt(1000)>0?true:false;
            student.setStatus(flag);
            //随机生成手机号
            student.setStuPhoneNumber(phoneNumberStarts[randomInt(7)]+getRandom(8));
            student.setStuCreateTime(new Date());
            students.add(student);
            if(i%25000==0){
                //没25000条数据提交一次
                count += studentDao.insertBatch(students);
                students.clear();
            }
        }
        count += studentDao.insertBatch(students);
        Long end = System.currentTimeMillis();

        System.out.println("用时:"+(end-start));
        return JsonResult.ok("time",end-start).put("size",count);
    }


    /**
     * 获取随机数,取值范围为[0,(n-1)]
     * @param n
     * @return
     */
    public static int randomInt(int n){
        Random random = new Random();
        return random.nextInt(n);
    }

    /**
     * 生成随机字符串
     * @return
     */
    public  String getRandom(int count){
        //元素
        int[] array = {0,1,2,3,4,5,6,7,8,9};
        //随机对象
        Random rand = new Random();
        //循环产生
        for (int i = 10; i > 1; i--) {
            int index = rand.nextInt(i);
            int tmp = array[index];
            array[index] = array[i - 1];
            array[i - 1] = tmp;
        }
        //拼接结果为字符串
        int result = 0;
        for(int i = 0; i < count; i++){
            result = result * 10 + array[i];
        }
        String sixString = Integer.toString(result);
        //有可能出现count-1位数,前面加0补全
        if (sixString.length() == count-1) {
            sixString = "0" + sixString;
        }
        return sixString;
    }

    String generateStuNumber(Long number){
        String sixString = Long.toString(number);
        //有可能出现9位数,前面加0补全
        if (sixString.length() != 10) {
            for(int i = 0;i<10-sixString.length();i++){
                sixString = "0" + sixString;
            }
        }
        return sixString;
    }
}

测试生成50000000数据用时约1380s!

猜你喜欢

转载自blog.csdn.net/qq_41345281/article/details/107710460