Hadoop源码分析(1)之hdfs启动脚本分析

HDFS 启动脚本

  • start-dfs.sh

    # 我的hadoop 安装位置 /opt/hadoop-2.7.7 
    #Hadoop sbin的位置  eg. /opt/hadoop-2.7.7/sbin   $HADOOP_HOME/sbin
    bin=`dirname "${BASH_SOURCE-$0}"`
    # 进入sbin 目录下
    bin=`cd "$bin"; pwd`
    # 一些执行配置文件的目录  eg:/opt/hadoop-2.7.7/sbin/../libexec
    DEFAULT_LIBEXEC_DIR="$bin"/../libexec
    # 目录:/opt/hadoop-2.7.7/sbin/../libexec
    HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
    # 执行目录:/opt/hadoop-2.7.7/sbin/../libexec 下的脚本 hdfs-config.sh
    . $HADOOP_LIBEXEC_DIR/hdfs-config.sh
    • hdfs-config.sh

      # 目录:/opt/hadoop-2.7.7/sbin/start-dfs.sh
      bin=`which "$0"`
      # 目录:/opt/hadoop-2.7.7/sbin
      bin=`dirname "${bin}"`
      # 进入目录:/opt/hadoop-2.7.7/sbin
      bin=`cd "$bin"; pwd`
      # 目录:/opt/hadoop-2.7.7/sbin/../libexec
      DEFAULT_LIBEXEC_DIR="$bin"/../libexec
      # 目录:/opt/hadoop-2.7.7/sbin/../libexec
      HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
      # 监测文件(包括目录)是否存在
      if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
          # 执行hadoop-config.sh(其它分支一直找到hadoop-config.sh)
        . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh
      elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
        . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh
      elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then
        . "$HADOOP_HOME"/libexec/hadoop-config.sh
      else
          # 都没找到hadoop-config.sh  启动失败
        echo "Hadoop common not found."
        exit
      fi
    • hadoop-config.sh

      # 脚本的位置 /opt/hadoop-2.7.7/sbin/../libexec/hadoop-config.sh
      this="${BASH_SOURCE-$0}"
      # /opt/hadoop-2.7.7/libexec
      common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
      # 脚本名字 hadoop-config.sh
      script="$(basename -- "$this")"
      # 脚本的绝对路径: /opt/hadoop-2.7.7/libexec/hadoop-config.sh
      this="$common_bin/$script"
      # 文件 hadoop-layout.sh 是普通文件 执行(我目录下没有,后续补充)
      [ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh"
      
      # 1. hadoop 一些库的路径 不粘贴了
      # 2. 确定hadoop 安装的根目录 HADOOP_PREFIX
      # 3. 检查参数
      # 4. 设置日志级别,默认INFO
      # 5. 设置备用的conf目录
      # 6. 可以通过参数指明host
      # 7. 可以通过参数指明hostname
      # 8. 确定是不是cygwin环境
      # 9. 校验确认不是ipv6 only
      # 10.尝试设置JAVA_HOME ,如果没设置的话
      # 11.设置一堆classpath 和 conf 目录
      # 总之就是这个脚本就是确认运行环境
      
    # 回到 start-dfs.sh 脚本上来
    
    # get arguments  获取参数, start-dfs.sh 的另外两个操作 -upgrade  和  -rollback
    if [[ $# -ge 1 ]]; then
     startOpt="$1"
     shift
     case "$startOpt" in
       -upgrade)
         nameStartOpt="$startOpt"
       ;;
       -rollback)
         dataStartOpt="$startOpt"
       ;;
       *)
         echo $usage
         exit 1
       ;;
     esac
    fi
    
    #Add other possible options 默认为启动  ,所以nameStartOpt 为空(没有参数)
    nameStartOpt="$nameStartOpt $@"
    
    # 启动namenode
    #---------------------------------------------------------
    # namenodes
    
    NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes)
    
    echo "Starting namenodes on [$NAMENODES]"
    
    "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
     --config "$HADOOP_CONF_DIR" \
     --hostnames "$NAMENODES" \
     --script "$bin/hdfs" start namenode $nameStartOpt
    
    
    # 启动datanode
    #---------------------------------------------------------
    # datanodes (using default slaves file)
    
    if [ -n "$HADOOP_SECURE_DN_USER" ]; then
     echo \
       "Attempting to start secure cluster, skipping datanodes. " \
       "Run start-secure-dns.sh as root to complete startup."
    else
     "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
       --config "$HADOOP_CONF_DIR" \
       --script "$bin/hdfs" start datanode $dataStartOpt
    fi
    
    # 启动secondary namenodes
    #---------------------------------------------------------
    # secondary namenodes (if any)
    
    SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null)
    
    if [ -n "$SECONDARY_NAMENODES" ]; then
     echo "Starting secondary namenodes [$SECONDARY_NAMENODES]"
    
     "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
         --config "$HADOOP_CONF_DIR" \
         --hostnames "$SECONDARY_NAMENODES" \
         --script "$bin/hdfs" start secondarynamenode
    fi
    
    # 启动quorumjournal (高可用集群的时候用于同步namenode 的信息)
    #---------------------------------------------------------
    # quorumjournal nodes (if any)
    
    SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
    
    case "$SHARED_EDITS_DIR" in
    qjournal://*)
     JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
     echo "Starting journal nodes [$JOURNAL_NODES]"
     "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
         --config "$HADOOP_CONF_DIR" \
         --hostnames "$JOURNAL_NODES" \
         --script "$bin/hdfs" start journalnode ;;
    esac
    
    # 启动zkfc -- 高可用集群故障转移使用
    #---------------------------------------------------------
    # ZK Failover controllers, if auto-HA is enabled
    AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled)
    if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then
     echo "Starting ZK Failover Controllers on NN hosts [$NAMENODES]"
     "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
       --config "$HADOOP_CONF_DIR" \
       --hostnames "$NAMENODES" \
       --script "$bin/hdfs" start zkfc
    fi
    

    可以看到在最后启动的时候调取的都是hadoop-daemons.sh 脚本,然后给三个参数,config (配置文件路径),hostname(在哪台服务器启动),script(启动哪些进程的脚本),接下来看一下 hadoop-daemons.sh 和 hdfs 这两个脚本

  • hadoop-daemons.sh

    usage="Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..."
    
    # if no args specified, show usage
    if [ $# -le 1 ]; then
      echo $usage
      exit 1
    fi
    
    bin=`dirname "${BASH_SOURCE-$0}"`
    bin=`cd "$bin"; pwd`
    
    DEFAULT_LIBEXEC_DIR="$bin"/../libexec
    HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
    # 刷新一下hadoop-config.sh 这个文件
    . $HADOOP_LIBEXEC_DIR/hadoop-config.sh
    # 执行slave.sh 这个脚本和 hadoop-daemon.sh(daemon是单数)  
    exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"
    • slaves.sh

      # 摘抄了关键部分的代码,遍历 子节点  然后ssh 过去执行hadoop-daemon.sh
      # Where to start the script, see hadoop-config.sh
      # (it set up the variables based on command line options)
      if [ "$HADOOP_SLAVE_NAMES" != '' ] ; then
        SLAVE_NAMES=$HADOOP_SLAVE_NAMES
      else
        SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves}
        SLAVE_NAMES=$(cat "$SLAVE_FILE" | sed  's/#.*$//;/^$/d')
      fi
      
      # start the daemons
      for slave in $SLAVE_NAMES ; do
       ssh $HADOOP_SSH_OPTS $slave $"${@// /\\ }" \
    • hadoop-daemon.sh

      # 第一段,获取操作的节点类型,比如启动namenode   最后command 就是namenode
      hadoopScript="$HADOOP_PREFIX"/bin/hadoop
      if [ "--script" = "$1" ]
        then
          shift
          hadoopScript=$1
          shift
      fi
      startStop=$1
      shift
      command=$1
      shift
      
      # 中间一段设置日志路径,进程pid文件路径等
      
      # 第三段
      
      case $startStop in
        # 启动进程
        (start)
          # 确认 pid文件路径可写
          [ -w "$HADOOP_PID_DIR" ] ||  mkdir -p "$HADOOP_PID_DIR"
      
          if [ -f $pid ]; then
            if kill -0 `cat $pid` > /dev/null 2>&1; then
              echo $command running as process `cat $pid`.  Stop it first.
              exit 1
            fi
          fi
      
          if [ "$HADOOP_MASTER" != "" ]; then
            echo rsync from $HADOOP_MASTER
            rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_PREFIX"
          fi
      
          hadoop_rotate_log $log
          echo starting $command, logging to $log
          cd "$HADOOP_PREFIX"
          case $command in
            namenode|secondarynamenode|datanode|journalnode|dfs|dfsadmin|fsck|balancer|zkfc)
              if [ -z "$HADOOP_HDFS_HOME" ]; then
                hdfsScript="$HADOOP_PREFIX"/bin/hdfs
              else
                hdfsScript="$HADOOP_HDFS_HOME"/bin/hdfs
              fi
      
              ####  调整执行的优先级 并 执行 hdfs 命令   启动namenode 的时候,$command = namenode ,$@ 为空 
              ####   相当于执行  hdfs namenode  命令
              nohup nice -n $HADOOP_NICENESS $hdfsScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
            ;;
          (*)
              nohup nice -n $HADOOP_NICENESS $hadoopScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
            ;;
          esac
          echo $! > $pid
          sleep 1
          head "$log"
          # capture the ulimit output
          if [ "true" = "$starting_secure_dn" ]; then
            echo "ulimit -a for secure datanode user $HADOOP_SECURE_DN_USER" >> $log
            # capture the ulimit info for the appropriate user
            su --shell=/bin/bash $HADOOP_SECURE_DN_USER -c 'ulimit -a' >> $log 2>&1
          elif [ "true" = "$starting_privileged_nfs" ]; then
              echo "ulimit -a for privileged nfs user $HADOOP_PRIVILEGED_NFS_USER" >> $log
              su --shell=/bin/bash $HADOOP_PRIVILEGED_NFS_USER -c 'ulimit -a' >> $log 2>&1
          else
            echo "ulimit -a for user $USER" >> $log
            ulimit -a >> $log 2>&1
          fi
          sleep 3;
          if ! ps -p $! > /dev/null ; then
            exit 1
          fi
          ;;
        # 停止进程
      
          if [ -f $pid ]; then
            TARGET_PID=`cat $pid`
            if kill -0 $TARGET_PID > /dev/null 2>&1; then
              echo stopping $command
              kill $TARGET_PID
              sleep $HADOOP_STOP_TIMEOUT
              if kill -0 $TARGET_PID > /dev/null 2>&1; then
                echo "$command did not stop gracefully after $HADOOP_STOP_TIMEOUT seconds: killing with kill -9"
                kill -9 $TARGET_PID
              fi
            else
              echo no $command to stop
            fi
            rm -f $pid
          else
            echo no $command to stop
          fi
          ;;
      
        (*)
          echo $usage
          exit 1
          ;;
      esac
      
      
  • hdfs

    #摘抄部分  ,确定Java主类
    if [ "$COMMAND" = "namenode" ] ; then
      CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS"
    elif [ "$COMMAND" = "zkfc" ] ; then
      CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_ZKFC_OPTS"
    elif [ "$COMMAND" = "secondarynamenode" ] ; then
      CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"
    elif [ "$COMMAND" = "datanode" ] ; then
      CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
      if [ "$starting_secure_dn" = "true" ]; then
        HADOOP_OPTS="$HADOOP_OPTS -jvm server $HADOOP_DATANODE_OPTS"
      else
        HADOOP_OPTS="$HADOOP_OPTS -server $HADOOP_DATANODE_OPTS"
      fi
    elif [ "$COMMAND" = "journalnode" ] ; then
      CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOURNALNODE_OPTS"
    
    
    
     # run it  启动Java 类的入口
      exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
    
      #### 部分
     #启动namenode 运行的类
     #org.apache.hadoop.hdfs.server.namenode.NameNode
     #启动datanode 运行的类
     #org.apache.hadoop.hdfs.server.datanode.DataNode
     #启动secondarynamenode 运行的类
     #org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode
     # 查看version 
     #org.apache.hadoop.util.VersionInfo

总结

hdfs启动是由start-dfs.sh 开始,中间调用一些环境设置的脚本hdfs-config.sh,hadoop-config,sh ,然后调用hadoop-daemons.sh 启动,hadoop-daemons.sh 通过调用slaves.sh 脚本 ,ssh到每一台节点中,然后调用hadoop-daemon.sh脚本启动,hadoop-daemon.sh 先去确定一些进程相关,然后调用 hdfs 脚本,确认具体操作,确认Java主类,然后执行,接下来就要看NameNode类做了一些什么了

猜你喜欢

转载自www.cnblogs.com/roadzhao/p/12453013.html