数据准备与数据迁移

一、项目数据准备

将备份的数据导入到mysql中

create database db_novel;

mysql -u root -p199911 </opt/shell/novel.sql db_novel

# 为了确保数据使用10000条
create table novel_test as select * from novel limit 10000;

数据迁移架构
在这里插入图片描述

二、基于LogStash数据迁移至ES

LogStash数据迁移:
在这里插入图片描述

启动LogStash数据迁移,需要编写数据迁移脚本,模版如下:

cd logstash-6.7.2
bin/logstash -e 'input { stdin { } } output { stdout {} }'

LogStash脚本参考地址:
https://www.elastic.co/guide/en/logstash/6.7/index.html

根据业务需求与数据格式,编写LogStash脚本如下:

vim novel.conf
input {
    
     
stdin {
    
    } 
jdbc {
    
    
    jdbc_driver_library => "/opt/shell/mysql-connector-java-5.1.48-bin.jar"
    jdbc_driver_class => "com.mysql.jdbc.Driver"
    jdbc_connection_string => "jdbc:mysql://bigdata-pro-m04:3306/db_novel"
    jdbc_user => "root"
    jdbc_password => "199911"
    statement => "SELECT * from novel_test"
    jdbc_validate_connection => "true"
    jdbc_validation_timeout => "3600"
    connection_retry_attempts => "5"
    jdbc_paging_enabled => "true"
    jdbc_page_size => "1000"
    sql_log_level => "warn"
    lowercase_column_names => "false"
   	}
} 

filter {
    
    
	json {
    
    
		source => "message"
		remove_field => ["message"]
	}
}

output {
    
     
elasticsearch {
    
    
	hosts => "bigdata-pro-m04:9200"
	index => "novel"
	document_id => "%{id}"
    }
stdout {
    
    
	codec => "json_lines"
	} 
}

1.将数据迁移脚本文件拷贝到/etc/logstash/conf.d这个目录下
2.在root用户下进入到/usr/share/logstash这个目录,并运行如下命令

bin/logstash -f /etc/logstash/conf.d/novel.conf

查看是否导入数据成功:

在这里插入图片描述

通过查看Kibana可以发现数据导入成功。

三、基于MR数据迁移至HBase

创建hbase表:

create 'novel_detail','cf'

通过MR程序将mysql的数据导入到hbase表中

package com.data.migrate;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.lib.db.DBWritable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

/**
 * @author :caizhengjie
 * @description:TODO
 * @date :2021/2/3 10:46 下午
 */
public class NovelDetail implements WritableComparable<NovelDetail>, DBWritable {
    
    

    private int id;
    private String chapterName;
    private String authorName;
    private String novelName;
    private String chapterUrl;


    public NovelDetail() {
    
    

    }

    public NovelDetail(int id, String authorName, String novelName, String chapterName, String chapterUrl) {
    
    
        this.id = id;
        this.authorName = authorName;
        this.novelName = novelName;
        this.chapterName = chapterName;
        this.chapterUrl = chapterUrl;

    }

    public int getId() {
    
    
        return id;
    }

    public void setId(int id) {
    
    
        this.id = id;
    }

    public String getAuthorName() {
    
    
        return authorName;
    }

    public void setAuthorName(String authorName) {
    
    
        this.authorName = authorName;
    }

    public String getNovelName() {
    
    
        return novelName;
    }

    public void setNovelName(String novelName) {
    
    
        this.novelName = novelName;
    }

    public String getChapterName() {
    
    
        return chapterName;
    }

    public void setChapterName(String chapterName) {
    
    
        this.chapterName = chapterName;
    }

    public String getChapterUrl() {
    
    
        return chapterUrl;
    }

    public void setChapterUrl(String chapterUrl) {
    
    
        this.chapterUrl = chapterUrl;
    }


    @Override
    public int compareTo(NovelDetail o) {
    
    
        return this.id - o.id;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
    
    
        dataOutput.writeInt(id);
        dataOutput.writeUTF(authorName);
        dataOutput.writeUTF(novelName);
        dataOutput.writeUTF(chapterName);
        dataOutput.writeUTF(chapterUrl);

    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
    
    
        this.id = dataInput.readInt();
        this.authorName = dataInput.readUTF();
        this.novelName = dataInput.readUTF();
        this.chapterName = dataInput.readUTF();
        this.chapterUrl = dataInput.readUTF();

    }

    @Override
    public void write(PreparedStatement preparedStatement) throws SQLException {
    
    
        // 类似于jdbc,使用的PreparedStatement进行赋值
        int index = 1;
        preparedStatement.setInt(index++,id);
        preparedStatement.setString(index++,authorName);
        preparedStatement.setString(index++,novelName);
        preparedStatement.setString(index++,chapterName);
        preparedStatement.setString(index++,chapterUrl);

    }

    @Override
    public void readFields(ResultSet resultSet) throws SQLException {
    
    
        // 类似于jbdc的查询
        int index = 1;
        id = resultSet.getInt(index++);
        authorName = resultSet.getString(index++);
        novelName = resultSet.getString(index++);
        chapterName = resultSet.getString(index++);
        chapterUrl = resultSet.getString(index++);

    }
}
package com.data.migrate;

import org.apache.commons.codec.digest.DigestUtils;

/**
 * @author :caizhengjie
 * @description:TODO
 * @date :2021/2/4 9:55 上午
 */
public class JavaUtils {
    
    

    /**
     * MD5
     * @return
     */
    public static String md5(String key){
    
    
        // 加密后的字符串
        return DigestUtils.md5Hex(key);
    }
}
package com.data.migrate;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
import org.apache.hadoop.mapred.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
 * @author :caizhengjie
 * @description:mysql数据迁移到hbase
 * @date :2021/2/4 8:42 上午
 */
public class MysqlHBaseMapReduce {
    
    

    /**
     * map
     */
    public static class MysqlHBaseMapper extends Mapper<LongWritable ,NovelDetail,NovelDetail, NullWritable> {
    
    

        @Override
        protected void map(LongWritable key, NovelDetail value, Context context) throws IOException, InterruptedException {
    
    
            context.write(value, NullWritable.get());
        }
    }

    /**
     * reduce
     */
    public static class MysqlHBaseReducer extends TableReducer<NovelDetail,NullWritable, ImmutableBytesWritable>{
    
    

        @Override
        protected void reduce(NovelDetail key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
    
    
            int detailId = key.getId();
            String authorName = key.getAuthorName();
            String novelName = key.getNovelName();
            String chapterName = key.getChapterName();
            String chapterUrl = key.getChapterUrl();

            // 设计rowkey:小说名字加上章节名字组成的rowkey
            String rowkey = System.currentTimeMillis()+Math.random()+ JavaUtils.md5(chapterUrl);

            // java操作hbase先将rowkey写入
            final Put put = new Put(Bytes.toBytes(rowkey));

            // 接下来就是迭代赋值
            for (NullWritable value : values) {
    
    
                put.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("id"),Bytes.toBytes(detailId));
                put.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("authorName"),Bytes.toBytes(authorName));
                put.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("novelName"),Bytes.toBytes(novelName));
                put.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("chapterName"),Bytes.toBytes(chapterName));
                put.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("chapterUrl"),Bytes.toBytes(chapterUrl));
            }

            // key value
            context.write(new ImmutableBytesWritable(Bytes.toBytes(rowkey)),put);

        }
    }

    public static class MysqlHBaseDriver extends Configured implements Tool {
    
    

        public static void main(String[] args) throws Exception {
    
    
            // 获取Configuration对象,读取配置信息
            Configuration configuration = HBaseConfiguration.create();
            ToolRunner.run(configuration,new MysqlHBaseDriver(),args);
        }

        @Override
        public int run(String[] strings) throws Exception {
    
    
            // get conf
            Configuration conf = this.getConf();

            // 配置MySQL的的url,用户名和密码
            DBConfiguration.configureDB(conf,
                    "com.mysql.jdbc.Driver",
                    "jdbc:mysql://bigdata-pro-m04:3306/db_novel?useSSL=false",
                    "root",
                    "199911");

            // create job
            Job job = Job.getInstance(conf,this.getClass().getSimpleName());
            job.setJarByClass(MysqlHBaseDriver.class);

            // map,指定job的mapper和输出的类型
            job.setMapperClass(MysqlHBaseMapper.class);
            job.setMapOutputKeyClass(NovelDetail.class);
            job.setMapOutputValueClass(NullWritable.class);

            // set reducer and output,把数据存储到hbase的novel_detail中
            TableMapReduceUtil.initTableReducerJob("novel_detail",MysqlHBaseReducer.class,job);

            // 设置输入格式是从database中读取
            job.setInputFormatClass(DBInputFormat.class);

            // job,继承DBWritable的类,表名,查询条件,按那个字段进行排序,要读取的字段
            DBInputFormat.setInput(job, NovelDetail.class,
                    " novel_detail", null, "id", "id",
                    "author_name", "novel_name", "chapter_name", "chapter_url");

            job.waitForCompletion(true);

            return 0;
        }
    }
}
21/02/04 14:32:37 INFO mapreduce.Job: Counters: 35
	File System Counters
		FILE: Number of bytes read=7070394052
		FILE: Number of bytes written=8471690289
		FILE: Number of read operations=0
		FILE: Number of large read operations=0
		FILE: Number of write operations=0
		HDFS: Number of bytes read=0
		HDFS: Number of bytes written=0
		HDFS: Number of read operations=0
		HDFS: Number of large read operations=0
		HDFS: Number of write operations=0
	Map-Reduce Framework
		Map input records=10409794
		Map output records=10409794
		Map output bytes=1373170312
		Map output materialized bytes=1400187855
		Input split bytes=78
		Combine input records=0
		Combine output records=0
		Reduce input groups=10409794
		Reduce shuffle bytes=1400187855
		Reduce input records=10409794
		Reduce output records=10409794
		Spilled Records=36609312
		Shuffled Maps =1
		Failed Shuffles=0
		Merged Map outputs=1
		GC time elapsed (ms)=3513
		Total committed heap usage (bytes)=632291328
	Shuffle Errors
		BAD_ID=0
		CONNECTION=0
		IO_ERROR=0
		WRONG_LENGTH=0
		WRONG_MAP=0
		WRONG_REDUCE=0
	File Input Format Counters 
		Bytes Read=0
	File Output Format Counters 
		Bytes Written=0

Process finished with exit code 0

查看hbase中的数据,可以发现导入成功:

count 'novel_detail'
10409794 row(s) in 370.3170 seconds

=> 10409794

以上内容仅供参考学习,如有侵权请联系我删除!
如果这篇文章对您有帮助,左下角的大拇指就是对博主最大的鼓励。
您的鼓励就是博主最大的动力!

猜你喜欢

转载自blog.csdn.net/weixin_45366499/article/details/113619942
今日推荐