需求:两个数据库A,B 现在对A库中一个字段加密后,插入到B库表中
业务分析:业务逻辑比较简单,从A库中拿到数据,数据清洗,插入B库中
业务难点:A库中表的数据量比较大,50W数据(相对意义大数据量),单纯select * from table 查询时间很长,而且容易内存溢出,并且数据清洗完插入B库中效率也是很慢的,我这里采取分批次处理,并且采用线程池 多线程处理数据
1.controller层
@RequestMapping("/vehicle/updateUid")
@ResponseBody
public void vehicleInfoProcess() {
long startTime = System.currentTimeMillis();
//开始位置
int id = tempVehicleTInfoService.getMinId();
//每次取的记录数
int limit = 1000;
//最大id
int maxId = tempVehicleTInfoService.getMaxId();
//可以直接使用while
do {
Map<String, Object> params = new HashMap<>(16);
params.put("id", id);
params.put("limit", limit);
params.put("sort", "id");
params.put("order", "asc");
tempVehicleTInfoService.updateVehicleUUID(params);
id += 1000;
} while (id < maxId);
}
2.service层
@Override
@Async("taskExecutor")
@Transactional(rollbackFor = Exception.class)
public void updateVehicleUUID(Map<String, Object> map) {
List<TempVehicleTInfoDO> tempVehicleTInfoDOS = tempVehicleTInfoDao.listByIdScope(map);
for (TempVehicleTInfoDO tempVehicleTInfoDO : tempVehicleTInfoDOS) {
//遍历数据清洗
tempVehicleTInfoDO.setUid(UUIDUtils.getUUID());
}
//必须采用数据批量更新或插入,如果使用单次效率极低
tempVehicleTInfoDao.batchUpdate(tempVehicleTInfoDOS);
log.info("当前id:" + map.get("id"));
}
3.mapper文件
//分批次查询A库数据的sql
<select id="listByIdScope" resultType="com.qgnjjl.data.domain.TempVehicleTInfoDO">
select `id` from temp_vehicle_t_info
<where>
<if test="id != null and id != ''"> and id >= #{id} </if>
</where>
<choose>
<when test="sort != null and sort.trim() != ''">
order by ${sort} ${order}
</when>
<otherwise>
order by id desc
</otherwise>
</choose>
<if test="limit != null">
limit #{limit}
</if>
</select>
<select id="getMaxId" resultType="int">
SELECT MAX(id) FROM `temp_vehicle_t_info`
</select>
<select id="getMinId" resultType="int">
SELECT MIN(id) FROM `temp_vehicle_t_info`
</select>
4.线程池配置
@EnableAsync
@Configuration
public class ExecutorConfig {
/** 核心线程数 */
private int corePoolSize = 10;
/** 最大线程数 */
private int maxPoolSize = 50;
/** 队列大小 */
private int queueCapacity = 10;
/** 线程最大空闲时间 */
private int keepAliveSeconds = 150;
@Bean("taskExecutor")
public Executor taskExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(corePoolSize);
executor.setMaxPoolSize(maxPoolSize);
executor.setQueueCapacity(queueCapacity);
executor.setThreadNamePrefix("taskExecutor-");
executor.setKeepAliveSeconds(keepAliveSeconds);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是由调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
executor.initialize();
return executor;
}
}
5.数据库连接池
spring:
datasource:
type: com.alibaba.druid.pool.DruidDataSource
driverClassName: com.mysql.jdbc.Driver
url: jdbc:mysql://ip:port/ku?serverTimezone=UTC&useUnicode=true&characterEncoding=utf8&allowMultiQueries=true
# username: root
# password: passw0rd
initialSize: 1
minIdle: 3
maxActive: 20
# 配置获取连接等待超时的时间
maxWait: 60000
# 配置间隔多久才进行一次检测,检测需要关闭的空闲连接,单位是毫秒
timeBetweenEvictionRunsMillis: 60000
# 配置一个连接在池中最小生存的时间,单位是毫秒
minEvictableIdleTimeMillis: 30000
validationQuery: select 'x'
testWhileIdle: true
testOnBorrow: false
testOnReturn: false
# 打开PSCache,并且指定每个连接上PSCache的大小
poolPreparedStatements: true
maxPoolPreparedStatementPerConnectionSize: 20
# 配置监控统计拦截的filters,去掉后监控界面sql无法统计,'wall'用于防火墙
filters: stat,slf4j
# 通过connectProperties属性来打开mergeSql功能;慢SQL记录
connectionProperties: druid.stat.mergeSql=true;druid.stat.slowSqlMillis=5000
# 合并多个DruidDataSource的监控数据
#useGlobalDataSourceStat: true
removeAbandoned : true
removeAbandonedTimeout : 300
logAbandoned : false
5.数据库连接池配置
@Configuration
public class DruidDBConfig {
private Logger logger = LoggerFactory.getLogger(DruidDBConfig.class);
@Value("${spring.datasource.url}")
private String dbUrl;
@Value("${spring.datasource.username}")
private String username;
@Value("${spring.datasource.password}")
private String password;
@Value("${spring.datasource.driverClassName}")
private String driverClassName;
@Value("${spring.datasource.initialSize}")
private int initialSize;
@Value("${spring.datasource.minIdle}")
private int minIdle;
@Value("${spring.datasource.maxActive}")
private int maxActive;
@Value("${spring.datasource.maxWait}")
private int maxWait;
@Value("${spring.datasource.timeBetweenEvictionRunsMillis}")
private int timeBetweenEvictionRunsMillis;
@Value("${spring.datasource.minEvictableIdleTimeMillis}")
private int minEvictableIdleTimeMillis;
@Value("${spring.datasource.validationQuery}")
private String validationQuery;
@Value("${spring.datasource.testWhileIdle}")
private boolean testWhileIdle;
@Value("${spring.datasource.testOnBorrow}")
private boolean testOnBorrow;
@Value("${spring.datasource.testOnReturn}")
private boolean testOnReturn;
@Value("${spring.datasource.poolPreparedStatements}")
private boolean poolPreparedStatements;
@Value("${spring.datasource.maxPoolPreparedStatementPerConnectionSize}")
private int maxPoolPreparedStatementPerConnectionSize;
@Value("${spring.datasource.filters}")
private String filters;
@Value("{spring.datasource.connectionProperties}")
private String connectionProperties;
@Bean(initMethod = "init", destroyMethod = "close") //声明其为Bean实例
@Primary //在同样的DataSource中,首先使用被标注的DataSource
public DataSource dataSource() {
DruidDataSource datasource = new DruidDataSource();
datasource.setUrl(this.dbUrl);
datasource.setUsername(username);
datasource.setPassword(password);
datasource.setDriverClassName(driverClassName);
//configuration
datasource.setInitialSize(initialSize);
datasource.setMinIdle(minIdle);
datasource.setMaxActive(maxActive);
datasource.setMaxWait(maxWait);
datasource.setTimeBetweenEvictionRunsMillis(timeBetweenEvictionRunsMillis);
datasource.setMinEvictableIdleTimeMillis(minEvictableIdleTimeMillis);
datasource.setValidationQuery(validationQuery);
datasource.setTestWhileIdle(testWhileIdle);
datasource.setTestOnBorrow(testOnBorrow);
datasource.setTestOnReturn(testOnReturn);
datasource.setPoolPreparedStatements(poolPreparedStatements);
datasource.setMaxPoolPreparedStatementPerConnectionSize(maxPoolPreparedStatementPerConnectionSize);
try {
datasource.setFilters(filters);
} catch (SQLException e) {
logger.error("druid configuration initialization filter", e);
}
datasource.setConnectionProperties(connectionProperties);
return datasource;
}
@Bean
public ServletRegistrationBean druidServlet() {
ServletRegistrationBean reg = new ServletRegistrationBean();
reg.setServlet(new StatViewServlet());
reg.addUrlMappings("/druid/*");
reg.addInitParameter("allow", ""); //白名单
return reg;
}
@Bean public FilterRegistrationBean filterRegistrationBean() {
FilterRegistrationBean filterRegistrationBean = new FilterRegistrationBean();
filterRegistrationBean.setFilter(new WebStatFilter());
filterRegistrationBean.addUrlPatterns("/*");
filterRegistrationBean.addInitParameter("exclusions", "*.js,*.gif,*.jpg,*.png,*.css,*.ico,/druid/*");
filterRegistrationBean.addInitParameter("profileEnable", "true");
filterRegistrationBean.addInitParameter("principalCookieName","USER_COOKIE");
filterRegistrationBean.addInitParameter("principalSessionName","USER_SESSION");
filterRegistrationBean.addInitParameter("DruidWebStatFilter","/*");
return filterRegistrationBean;
}
}
文末:在数据批量插入或批量更新时在配置文件数据库连接配置中 要加上 &allowMultiQueries=true 并且这里不能配置wall filters: stat,slf4j