不同集群间数据cp,并且获取最新文件

#!/bin/sh
###################
. /etc/profile
. ~/.bash_profile
##################

echo =====================================`date`===============================================

SCRIPT_NAME=$(readlink -f "$0")
hostname=`hostname`
dir=`dirname ${SCRIPT_NAME}`
cd $dir

if [[ -z $1 ]]
then
  ptime=`date -d "-2 hour" +%Y%m%d%H`
else
  ptime=$1
fi
set -x
echo $ptime
pdate=`echo $ptime|awk '{print substr($1,1,8)}'`
ph=`echo $ptime|awk '{print substr($1,9,2)}'`
idate=`echo $ptime|awk '{print substr($1,1,8)}'`
date1ago=`date -d "$pdate -1 day" +%Y%m%d`
date30ago=`date -d "$pdate -30 day" +%Y%m%d`
date90ago=`date -d "$pdate -90 day" +%Y%m%d`

echo $pdate
remote_hdfs="hdfs://nn.its.com:8020/"
hadoop fs -ls hdfs://nn.its.com:8020/
if [ $? -ne 0 ];then
  remote_hdfs="hdfs://rm.its.com:8020/"
fi
remote_file=`hadoop fs -ls ${remote_hdfs}/user/personal_recommend/ | awk '{print $8}' | grep its.txt. | tail -1`
echo $remote_file

#删除历史数据
hadoop fs -rmr /user/client/warehouse/recommend/r_its/p_day=$pdate/

hive -e "
alter table recommend.r_its drop partition (p_day=$pdate);
alter table recommend.r_its add partition (p_day=$pdate);"


hadoop distcp -Dmapred.speculative.execution=false -Ddfs.replication=3 -bandwidth 10 -m 25 -update -skipcrccheck $remote_file /user/client/warehouse/recommend/r_its/p_day=$pdate/


 

猜你喜欢

转载自blog.csdn.net/yisun123456/article/details/82220325