准备工作:
1.数据库表:datatest5
2.字段: id 主键自增,val 重复值字段并为val创建索引
3.设置innodb缓冲池大小
show variables like "%_buffer%";
SET GLOBAL innodb_buffer_pool_size=8388608;-- 系统值(完事后修改回来)
SET GLOBAL innodb_buffer_pool_size=102400000;-- 修改后
public void distinct(){
long currentTimeMillis = System.currentTimeMillis();
//第一步
int sum = Integer.valueOf( jdbcTemplate.queryForList(" SELECT COUNT(0) ct from datatest5 ").get(0).get("ct").toString()); // 初始总条数
System.out.println(sum+"=sum");//57667549=sum
long currentTimeMilli1 = System.currentTimeMillis();
System.out.println(currentTimeMilli1-currentTimeMillis);//24924
//第二步
int target = Integer.valueOf( jdbcTemplate.queryForList(" SELECT COUNT(DISTINCT val) ct FROM datatest5 ").get(0).get("ct").toString()); // 目标条数
System.out.println(target+"=target"); //52328013=target
long currentTimeMilli2 = System.currentTimeMillis();
System.out.println(currentTimeMilli2-currentTimeMilli1);//52639
//第三步
int maxct = Integer.valueOf( jdbcTemplate.queryForList(" SELECT MAX(a.ct) maxct from (SELECT COUNT(0) ct FROM datatest5 GROUP BY val HAVING COUNT( val ) >1) a ").get(0).get("maxct").toString()); // 分组后重复条数最大值 (循环次数)
System.out.println(maxct+"=maxct");//3=maxct
long currentTimeMilli3 = System.currentTimeMillis();
System.out.println(currentTimeMilli3-currentTimeMilli2);//39871
//第四步
System.out.println("进入循环");
for (int i = 0; i < maxct; i++) {
long time = System.currentTimeMillis();
// 循环体
jdbcTemplate.execute(" CREATE TABLE tmptable5 AS (SELECT val FROM datatest5 GROUP BY val HAVING COUNT( val ) >1) ");// 重复值表
jdbcTemplate.execute(" CREATE TABLE idtable5 AS ( SELECT min(a.id) AS id, a.val FROM datatest5 a, tmptable5 t WHERE a.val = t.val GROUP BY a.val) ");// 重复的val和其对应id最小值表
jdbcTemplate.execute(" DELETE a FROM datatest5 a,idtable5 t WHERE a.id = t.id ");// 删除原数据a表中一条数据(重复的val和其对应id最小值)
jdbcTemplate.execute("DROP TABLE tmptable5 ");
jdbcTemplate.execute("DROP TABLE idtable5 ");
// 循环体结束
long time1 = System.currentTimeMillis();
System.out.println();
System.out.println(time1-time);//1264714//1343923//1186753
System.out.println();
}
System.out.println("循环体结束");
//第五步
long time = System.currentTimeMillis();
int results = Integer.valueOf( jdbcTemplate.queryForList(" SELECT COUNT(0) ct from datatest5 ").get(0).get("ct").toString()); // 处理后总条数
System.out.println(results+"=results");//52328013
long time1 = System.currentTimeMillis();
System.out.println(time1-time);
//第六步
// 处理后总条数==目标条数?
if (results==target) {
System.out.println("成功!!!");
}
}
一共用时65分钟;
借鉴文章: