文章目录
1. 前言
基于阿里云搭建hadoop平台,并实现远程访问调用
需要在安全组开放如下端口:
9000
50010
50070
2. 添加hadoop用户
添加用户,防止误操作搞崩了,密码自己设置。
useradd hadoop
passwd hadoop
usermod -aG wheel Hadoop
3. 配置/etc/hosts文件
左边是集群的内网ip,右边是你的主机名
172.31.18.35 k8s-master
172.31.18.36 k8s-node1
4. 设置ssh免密登录
4.1 安装ssh
检测是否安装:
rpm -qa | grep ssh
安装:
yum list | grep ssh
yum install -y <clients>
yum install -y <server>
4.2 设置免密
ssh-keygen -t rsa -P ''
ssh-copy-id k8s-master
ssh-copy-id k8s-node1
5. 安装JDK
sudo yum update
sudo yum install java-1.8.0-openjdk-devel
6. 安装hadoop
mkdir software
cd software
wget https://www-us.apache.org/dist/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz
tar xvf hadoop-2.7.7.tar.gz
mv hadoop-2.7.7 hadoop
7. 配置环境变量
找到jdk目录:
update-alternatives --display java
发现:
/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.232.b09-0.el7_7.x86_64/jre
然后修改/etc/profile文件:
vim /etc/profile
source /etc/profile
插入:
#java
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.232.b09-0.el7_7.x86_64/jre
export PATH=$PATH:$JAVA_HOME/bin
#hadoop
export HADOOP_HOME=/home/hadoop/software/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH
8. 配置hadoop
8.1 hadoop-env.sh
vim /home/hadoop/software/hadoop/etc/hadoop/hadoop-env.sh
修改JAVA_HOME:
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
-1.8.0.232.b09-0.el7_7.x86_64/jre
8.2 core-site.xml
vim /home/hadoop/software/hadoop/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://k8s-master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/software/hadoop/tmp</value>
</property>
</configuration>
8.3 hdfs-site.xml
vim /home/hadoop/software/hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>
8.4 mapred-site.xml
mv mapred-site.xml.template mapred-site.xml
vim mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobtracker.address</name>
<value>yarn</value>
</property>
8.5 yarn-site.xml
vim yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>k8s-master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
8.6 Slaves
vim slaves
添加你的主机名,添加的节点为datanode节点
k8s-master
k8s-node1
9. 格式化namenode
在k8s-master:
hdfs namenode -format
启动hadoop
start-all.sh
10. 远程访问
10.1 配置主机hosts文件
C:\Windows\System32\drivers\etc
【对应的公网ip】 k8s-master
【对应的公网ip】 k8s-node1
10.2 Maven配置
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.7</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.7</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.7</version>
</dependency>
10.3 代码示例
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URI;
/**
* @author lhd
*/
public class HdfsDemo {
private static final String HDFS_API = "hdfs://xx.xxx.xx.xxx:9000";
static Configuration configuration = null;
static FileSystem fileSystem = null;
/**
* 初始化
*/
public static void init(){
System.setProperty("HADOOP_USER_NAME", "hadoop");
configuration = new Configuration();
configuration.set("dfs.client.use.datanode.hostname", "true");
try {
fileSystem = FileSystem.get(URI.create(HDFS_API), configuration);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 创建文件api
*/
public static void createFolder(){
try{
//在指定的路径下创建文件夹
Path path = new Path("/demo");
fileSystem.mkdirs(path);
}catch (IOException e){
e.printStackTrace();
}
}
/**
* 递归显示文件
* @param path 文件路径
*/
public static void listFile(Path path){
try{
//FileStatus对象封装了文件和目录的元数据,包括文件长度、块大小、权限等信息
FileStatus[] fileStatusesArray = fileSystem.listStatus(path);
for(int i=0; i< fileStatusesArray.length; i++){
FileStatus fileStatus = fileStatusesArray[i];
//首先检查当前是否为文件夹,如果是则递归
if(fileStatus.isDirectory()){
System.out.println("当前路径是:"+ fileStatus.getPath());
listFile(fileStatus.getPath());
}else
System.out.println("当前路径是:"+fileStatus.getPath());
}
}catch (IOException e) {
e.printStackTrace();
}
}
/**
* 文件上传
*/
public static void uploadFile(){
try{
Path src = new Path("e://a.txt");
Path dest = new Path("/demo/a.txt");
fileSystem.copyFromLocalFile(src, dest);
}catch (IOException e){
e.printStackTrace();
}
}
/**
* 文件下载
*/
public static void downloadFile(){
try{
Path src = new Path("/demo/a.txt"); // 下载源地址
Path dest = new Path("e://b.txt"); // 下载目的地址
//从服务器下载到本地
fileSystem.copyToLocalFile(src, dest);
}catch (IOException e){
e.printStackTrace();
}
}
public static void main(String[] args) {
init();
createFolder();
uploadFile();
downloadFile();
listFile(new Path("/"));
}
}