一、项目数据准备
将备份的数据导入到mysql中
create database db_novel;
mysql -u root -p199911 </opt/shell/novel.sql db_novel
# 为了确保数据使用10000条
create table novel_test as select * from novel limit 10000;
数据迁移架构
二、基于LogStash数据迁移至ES
LogStash数据迁移:
启动LogStash数据迁移,需要编写数据迁移脚本,模版如下:
cd logstash-6.7.2
bin/logstash -e 'input { stdin { } } output { stdout {} }'
LogStash脚本参考地址:
https://www.elastic.co/guide/en/logstash/6.7/index.html
根据业务需求与数据格式,编写LogStash脚本如下:
vim novel.conf
input {
stdin {
}
jdbc {
jdbc_driver_library => "/opt/shell/mysql-connector-java-5.1.48-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://bigdata-pro-m04:3306/db_novel"
jdbc_user => "root"
jdbc_password => "199911"
statement => "SELECT * from novel_test"
jdbc_validate_connection => "true"
jdbc_validation_timeout => "3600"
connection_retry_attempts => "5"
jdbc_paging_enabled => "true"
jdbc_page_size => "1000"
sql_log_level => "warn"
lowercase_column_names => "false"
}
}
filter {
json {
source => "message"
remove_field => ["message"]
}
}
output {
elasticsearch {
hosts => "bigdata-pro-m04:9200"
index => "novel"
document_id => "%{id}"
}
stdout {
codec => "json_lines"
}
}
1.将数据迁移脚本文件拷贝到/etc/logstash/conf.d这个目录下
2.在root用户下进入到/usr/share/logstash这个目录,并运行如下命令
bin/logstash -f /etc/logstash/conf.d/novel.conf
查看是否导入数据成功:
通过查看Kibana可以发现数据导入成功。
三、基于MR数据迁移至HBase
创建hbase表:
create 'novel_detail','cf'
通过MR程序将mysql的数据导入到hbase表中
package com.data.migrate;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.lib.db.DBWritable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
/**
* @author :caizhengjie
* @description:TODO
* @date :2021/2/3 10:46 下午
*/
public class NovelDetail implements WritableComparable<NovelDetail>, DBWritable {
private int id;
private String chapterName;
private String authorName;
private String novelName;
private String chapterUrl;
public NovelDetail() {
}
public NovelDetail(int id, String authorName, String novelName, String chapterName, String chapterUrl) {
this.id = id;
this.authorName = authorName;
this.novelName = novelName;
this.chapterName = chapterName;
this.chapterUrl = chapterUrl;
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getAuthorName() {
return authorName;
}
public void setAuthorName(String authorName) {
this.authorName = authorName;
}
public String getNovelName() {
return novelName;
}
public void setNovelName(String novelName) {
this.novelName = novelName;
}
public String getChapterName() {
return chapterName;
}
public void setChapterName(String chapterName) {
this.chapterName = chapterName;
}
public String getChapterUrl() {
return chapterUrl;
}
public void setChapterUrl(String chapterUrl) {
this.chapterUrl = chapterUrl;
}
@Override
public int compareTo(NovelDetail o) {
return this.id - o.id;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(id);
dataOutput.writeUTF(authorName);
dataOutput.writeUTF(novelName);
dataOutput.writeUTF(chapterName);
dataOutput.writeUTF(chapterUrl);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.id = dataInput.readInt();
this.authorName = dataInput.readUTF();
this.novelName = dataInput.readUTF();
this.chapterName = dataInput.readUTF();
this.chapterUrl = dataInput.readUTF();
}
@Override
public void write(PreparedStatement preparedStatement) throws SQLException {
// 类似于jdbc,使用的PreparedStatement进行赋值
int index = 1;
preparedStatement.setInt(index++,id);
preparedStatement.setString(index++,authorName);
preparedStatement.setString(index++,novelName);
preparedStatement.setString(index++,chapterName);
preparedStatement.setString(index++,chapterUrl);
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
// 类似于jbdc的查询
int index = 1;
id = resultSet.getInt(index++);
authorName = resultSet.getString(index++);
novelName = resultSet.getString(index++);
chapterName = resultSet.getString(index++);
chapterUrl = resultSet.getString(index++);
}
}
package com.data.migrate;
import org.apache.commons.codec.digest.DigestUtils;
/**
* @author :caizhengjie
* @description:TODO
* @date :2021/2/4 9:55 上午
*/
public class JavaUtils {
/**
* MD5
* @return
*/
public static String md5(String key){
// 加密后的字符串
return DigestUtils.md5Hex(key);
}
}
package com.data.migrate;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
import org.apache.hadoop.mapred.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
/**
* @author :caizhengjie
* @description:mysql数据迁移到hbase
* @date :2021/2/4 8:42 上午
*/
public class MysqlHBaseMapReduce {
/**
* map
*/
public static class MysqlHBaseMapper extends Mapper<LongWritable ,NovelDetail,NovelDetail, NullWritable> {
@Override
protected void map(LongWritable key, NovelDetail value, Context context) throws IOException, InterruptedException {
context.write(value, NullWritable.get());
}
}
/**
* reduce
*/
public static class MysqlHBaseReducer extends TableReducer<NovelDetail,NullWritable, ImmutableBytesWritable>{
@Override
protected void reduce(NovelDetail key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
int detailId = key.getId();
String authorName = key.getAuthorName();
String novelName = key.getNovelName();
String chapterName = key.getChapterName();
String chapterUrl = key.getChapterUrl();
// 设计rowkey:小说名字加上章节名字组成的rowkey
String rowkey = System.currentTimeMillis()+Math.random()+ JavaUtils.md5(chapterUrl);
// java操作hbase先将rowkey写入
final Put put = new Put(Bytes.toBytes(rowkey));
// 接下来就是迭代赋值
for (NullWritable value : values) {
put.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("id"),Bytes.toBytes(detailId));
put.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("authorName"),Bytes.toBytes(authorName));
put.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("novelName"),Bytes.toBytes(novelName));
put.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("chapterName"),Bytes.toBytes(chapterName));
put.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("chapterUrl"),Bytes.toBytes(chapterUrl));
}
// key value
context.write(new ImmutableBytesWritable(Bytes.toBytes(rowkey)),put);
}
}
public static class MysqlHBaseDriver extends Configured implements Tool {
public static void main(String[] args) throws Exception {
// 获取Configuration对象,读取配置信息
Configuration configuration = HBaseConfiguration.create();
ToolRunner.run(configuration,new MysqlHBaseDriver(),args);
}
@Override
public int run(String[] strings) throws Exception {
// get conf
Configuration conf = this.getConf();
// 配置MySQL的的url,用户名和密码
DBConfiguration.configureDB(conf,
"com.mysql.jdbc.Driver",
"jdbc:mysql://bigdata-pro-m04:3306/db_novel?useSSL=false",
"root",
"199911");
// create job
Job job = Job.getInstance(conf,this.getClass().getSimpleName());
job.setJarByClass(MysqlHBaseDriver.class);
// map,指定job的mapper和输出的类型
job.setMapperClass(MysqlHBaseMapper.class);
job.setMapOutputKeyClass(NovelDetail.class);
job.setMapOutputValueClass(NullWritable.class);
// set reducer and output,把数据存储到hbase的novel_detail中
TableMapReduceUtil.initTableReducerJob("novel_detail",MysqlHBaseReducer.class,job);
// 设置输入格式是从database中读取
job.setInputFormatClass(DBInputFormat.class);
// job,继承DBWritable的类,表名,查询条件,按那个字段进行排序,要读取的字段
DBInputFormat.setInput(job, NovelDetail.class,
" novel_detail", null, "id", "id",
"author_name", "novel_name", "chapter_name", "chapter_url");
job.waitForCompletion(true);
return 0;
}
}
}
21/02/04 14:32:37 INFO mapreduce.Job: Counters: 35
File System Counters
FILE: Number of bytes read=7070394052
FILE: Number of bytes written=8471690289
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=0
HDFS: Number of bytes written=0
HDFS: Number of read operations=0
HDFS: Number of large read operations=0
HDFS: Number of write operations=0
Map-Reduce Framework
Map input records=10409794
Map output records=10409794
Map output bytes=1373170312
Map output materialized bytes=1400187855
Input split bytes=78
Combine input records=0
Combine output records=0
Reduce input groups=10409794
Reduce shuffle bytes=1400187855
Reduce input records=10409794
Reduce output records=10409794
Spilled Records=36609312
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=3513
Total committed heap usage (bytes)=632291328
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=0
Process finished with exit code 0
查看hbase中的数据,可以发现导入成功:
count 'novel_detail'
10409794 row(s) in 370.3170 seconds
=> 10409794
以上内容仅供参考学习,如有侵权请联系我删除!
如果这篇文章对您有帮助,左下角的大拇指就是对博主最大的鼓励。
您的鼓励就是博主最大的动力!