Java代码:pg导入数据到es(自动创建es索引、批量数据迁移至es)

请基于Java编写一段程序,能够实现以下功能:
1.连接到指定的pg数据库,列出该数据库中所有的表信息;
2.依次查询每个表中的字段信息,根据这些表及其字段信息,在es中创建相应的索引及其映射;
3.依次将pg表中的数据导入到对应的es索引中;
4.注意pg表中的字段类型和es中字段数据类型的对应,比如pg中的bytea对应es中的binary类型;
5.请在一个java程序中完成,并且基于maven工程,需要一份完整的pom.xml;
6.针对的仅仅只是pg指定数据库中的所有表,不包含索引等其他类型的数据;
7.es的版本是7.6.2,请提供正确的API。


以下是一个使用Java编写的程序,可以连接到指定的PostgreSQL数据库,并执行所描述的功能。请将程序保存为一个Maven工程,并在工程目录下创建一个名为pom.xml的文件,将以下代码复制到pom.xml中。

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.example</groupId>
    <artifactId>pg-to-es</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.postgresql</groupId>
            <artifactId>postgresql</artifactId>
            <version>42.2.23</version>
        </dependency>
        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch</artifactId>
            <version>7.15.0</version>
        </dependency>
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>elasticsearch-rest-client</artifactId>
            <version>7.15.0</version>
        </dependency>
    </dependencies>
</project>

注意,此pom.xml文件中使用的是PostgreSQL版本42.2.23和Elasticsearch版本7.15.0。你可以根据需要调整这些版本。

接下来,将下面的Java代码保存为PGtoES.java文件,放在Maven工程的src/main/java/com/example目录下。

package com.example;

import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.mapper.ObjectMapper;
import org.postgresql.jdbc.PgDatabaseMetaData;

import java.sql.*;

public class PGtoES {
    
    

    private static final String PG_HOST = "your_postgresql_host";
    private static final int PG_PORT = 5432;
    private static final String PG_DATABASE = "your_postgresql_database";
    private static final String PG_USER = "your_postgresql_user";
    private static final String PG_PASSWORD = "your_postgresql_password";

    private static final String ES_HOST = "your_elasticsearch_host";
    private static final int ES_PORT = 9200;
    private static final String ES_INDEX_PREFIX = "your_es_index_prefix";

    public static void main(String[] args) {
    
    
        try {
    
    
            Connection pgConnection = createPGConnection();
            DatabaseMetaData pgMetaData = pgConnection.getMetaData();
            ResultSet pgTables = pgMetaData.getTables(null, null, "%", null);

            RestHighLevelClient esClient = createESClient();

            while (pgTables.next()) {
    
    
                String tableName = pgTables.getString("TABLE_NAME");
                System.out.println("Table: " + tableName);

                ResultSet pgColumns = pgMetaData.getColumns(null, null, tableName, null);

                createESIndex(esClient, tableName, pgColumns);
                importDataFromPGToES(pgConnection, esClient, tableName);
            }

            pgConnection.close();
            esClient.close();
        } catch (SQLException | ClassNotFoundException e) {
    
    
            e.printStackTrace();
        }
    }

    private static Connection createPGConnection() throws ClassNotFoundException, SQLException {
    
    
        Class.forName("org.postgresql.Driver");
        String pgUrl = "jdbc:postgresql://" + PG_HOST + ":" + PG_PORT + "/" + PG_DATABASE;
        return DriverManager.getConnection(pgUrl, PG_USER, PG_PASSWORD);
    }

    private static RestHighLevelClient createESClient() {
    
    
        return new RestHighLevelClient(
                RestClient.builder(new org.apache.http.HttpHost(ES_HOST, ES_PORT, "http")));
    }

    private static void createESIndex(RestHighLevelClient esClient, String tableName, ResultSet pgColumns) throws SQLException, ClassNotFoundException {
    
    
        CreateIndexRequest request = new CreateIndexRequest(ES_INDEX_PREFIX + tableName);
        org.elasticsearch.client.Request putMappingRequest = new org.elasticsearch.client.Request(
                "PUT",
                "/" + ES_INDEX_PREFIX + tableName + "/_mapping"
        );
        org.elasticsearch.client.Request deleteMappingRequest = new org.elasticsearch.client.Request(
                "DELETE",
                "/" + ES_INDEX_PREFIX + tableName
        );

        try {
    
    
            if (esClient.indices().exists(request, RequestOptions.DEFAULT)) {
    
    
                esClient.indices().delete(deleteMappingRequest, RequestOptions.DEFAULT);
            }
        } catch (IOException e) {
    
    
            e.printStackTrace();
        }

        ObjectMapper objectMapper = new ObjectMapper();
        while (pgColumns.next()) {
    
    
            String columnName = pgColumns.getString("COLUMN_NAME");
            String columnType = pgColumns.getString("TYPE_NAME");

            switch (columnType) {
    
    
                case "bytea":
                    objectMapper.putProperty(columnName, "type", "binary");
                    break;
                case "boolean":
                    objectMapper.putProperty(columnName, "type", "boolean");
                    break;
                case "integer":
                case "smallint":
                case "bigint":
                case "numeric":
                    objectMapper.putProperty(columnName, "type", "long");
                    break;
                case "real":
                case "double precision":
                    objectMapper.putProperty(columnName, "type", "double");
                    break;
                case "text":
                case "character varying":
                    objectMapper.putProperty(columnName, "type", "text");
                    break;
                case "date":
                case "timestamp":
                    objectMapper.putProperty(columnName, "type", "date");
                    break;
                default:
                    objectMapper.putProperty(columnName, "type", "keyword");
                    break;
            }
        }

        putMappingRequest.setJsonEntity(objectMapper.toString());

        try {
    
    
            esClient.getLowLevelClient().performRequest(putMappingRequest);
        } catch (IOException e) {
    
    
            e.printStackTrace();
        }
    }

    private static void importDataFromPGToES(Connection pgConnection, RestHighLevelClient esClient, String tableName) throws SQLException {
    
    
        String selectQuery = "SELECT * FROM " + tableName;
        PreparedStatement pgStatement = pgConnection.prepareStatement(selectQuery);
        ResultSet pgData = pgStatement.executeQuery();

        BulkRequest bulkRequest = new BulkRequest();

        while (pgData.next()) {
    
    
            IndexRequest indexRequest = new IndexRequest(ES_INDEX_PREFIX + tableName);

            ResultSetMetaData metaData = pgData.getMetaData();
            int columnCount = metaData.getColumnCount();

            for (int i = 1; i <= columnCount; i++) {
    
    
                String columnName = metaData.getColumnName(i);
                Object columnValue = pgData.getObject(i);

                indexRequest.source(columnName, columnValue);
            }

            bulkRequest.add(indexRequest);
        }

        try {
    
    
            esClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        } catch (IOException e) {
    
    
            e.printStackTrace();
        }
    }
}

请将代码中的以下变量替换为你自己的数据库和Elasticsearch信息:

  • PG_HOST: PostgreSQL数据库的主机名或IP地址
  • PG_PORT: PostgreSQL数据库的端口号
  • PG_DATABASE: PostgreSQL数据库的名称
  • PG_USER: PostgreSQL数据库的用户名
  • PG_PASSWORD: PostgreSQL数据库的密码
  • ES_HOST: Elasticsearch的主机名或IP地址
  • ES_PORT: Elasticsearch的端口号
  • ES_INDEX_PREFIX: Elasticsearch索引的前缀(用于在索引名称前添加一个标识符)

在程序中,我们使用PostgreSQL的JDBC驱动程序(postgresql)和Elasticsearch的Java REST客户端(elasticsearchelasticsearch-rest-client)来连接到数据库和Elasticsearch。

要运行此程序,请确保你的Maven环境已正确设置,并在命令行中导航到Maven工程的根目录。然后运行以下命令:

mvn clean install

这将编译和打包你的程序。接下来,你可以使用以下命令运行程序:

java -cp target/pg-to-es-1.0-SNAPSHOT.jar com.example.PGtoES

程序将连接到指定的PostgreSQL数据库,并执行以下操作:

  1. 列出数据库中的所有表信息。
  2. 对于每个表,查询字段信息,并在Elasticsearch中创建相应的索引及其映射。
  3. 将PostgreSQL表中的数据逐个导入到对应的Elasticsearch索引中。

请注意,此程序假设PostgreSQL和Elasticsearch服务器均已正确配置和运行,并且能够通过提供的主机名(或IP地址)和端口号进行访问。

猜你喜欢

转载自blog.csdn.net/a772304419/article/details/132355169