1 #!/bin/bash 2 3 #提取本服务器的IP地址信息 4 ENO1=`ifconfig | sed -n '1,1p' | awk -F ' ' '{print $1}'` 5 IP=`ifconfig ${ENO1}| grep "inet addr" | cut -f 2 -d ":" | cut -f 1 -d " "` 6 echo 'eno: '${ENO1} 7 echo 'IP: '${IP} 8 9 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 10 # 各个监控警告值 11 WARN_LOAD=1.1 12 WARN_CPU=0.95 13 WARN_MEM=0.93 14 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 15 16 17 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 18 CPU=0.0 19 LOAD1=0.0 20 LOAD5=0.0 21 LOAD15=0.0 22 MEM=0.0 23 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 24 25 26 send_mail() { 27 echo 'send mail ' "${1}" "${2}" 28 } 29 30 31 # 1、监控系统负载的变化情况. 32 system_load_func() 33 { 34 cpu_num=`grep -c 'model name' /proc/cpuinfo` 35 36 load=$(uptime | awk -F 'load average: ' '{print $2}') 37 load_1=$(echo $load | awk -F ', ' '{print $1}') 38 load_5=$(echo $load | awk -F ', ' '{print $2}') 39 load_15=$(echo $load | awk -F ', ' '{print $3}') 40 41 #计算当前系统单个核心平均负载值,结果小于1.0时前面个位数补0。 42 LOAD1=`echo "scale=2;a=${load_1}/${cpu_num};if(length(a)==scale(a)) print 0;print a" | bc` 43 LOAD5=`echo "scale=2;a=${load_5}/${cpu_num};if(length(a)==scale(a)) print 0;print a" | bc` 44 LOAD15=`echo "scale=2;a=${load_15}/${cpu_num};if(length(a)==scale(a)) print 0;print a" | bc` 45 46 if [ `echo "${LOAD15} > ${WARN_LOAD}" | bc` -eq 1 ] 47 then 48 send_mail "${IP}服务器15分钟的系统平均负载为${LOAD15}, 超过警戒值${WARN_LOAD}, 请立即处理!" "$IP 服务器系统负载告警!" 49 fi 50 } 51 52 53 # 2、监控系统cpu的情况. 54 cpu_func() 55 { 56 #取当前空闲cpu百份比值(只取整数部分) 57 cpu_idle=`top -b -d 0.1 -n 2 | grep Cpu | tail -n 1 | awk '{print $8}' | cut -f 1 -d "."` 58 CPU=0`echo "scale=2; (100 - ${cpu_idle}) / 100" | bc` 59 60 if [ `echo "${CPU} > ${WARN_CPU}" | bc` -eq 1 ] 61 then 62 send_mail "${IP}服务器cpu使用${CPU},请及时处理." "$IP 服务器CPU告警" 63 fi 64 } 65 66 67 #3. 监控物理内存 68 mem_func() 69 { 70 mem=$(free -m | sed -n '2,2p') 71 mem_total=`echo ${mem} | awk -F ' ' '{print $2}'` 72 mem_used=`echo ${mem} | awk -F ' ' '{print $3}'` 73 # mem_free=`echo ${mem} | awk -F ' ' '{print $4}'` 74 75 if [ $mem_used -ne 0 ] 76 then 77 MEM=0`echo "scale=2; ${mem_used} / ${mem_total}" | bc` 78 if [ `echo "${MEM} > ${WARN_MEM}" | bc` -eq 1 ] 79 then 80 send_mail "$IP服务器物理内存已使用 ${MEM},请及时处理." "$IP 服务器内存告警" 81 fi 82 fi 83 } 84 85 86 CNT=1 87 print_info() { 88 echo "# # # # # # # # # # # [${CNT}] # # # # # # # # # # # # # #" 89 echo cpu: ${CPU} 90 echo load1: ${LOAD1} 91 echo load5: ${LOAD5} 92 echo load15: ${LOAD15} 93 echo mem: ${MEM} 94 echo "# # # # # # # # # # # # # # # # # # # # # # # # # # #" 95 (( CNT += 1 )) 96 # exit 0 97 } 98 99 100 while true 101 do 102 system_load_func 103 cpu_func 104 mem_func 105 print_info 106 sleep 5 107 done
注: 需要下载bc命令:
sudo apt-get install bc
这里我只在脚本里面写了获取各个监控信息的数据, 发送邮件和数据写入数据库这些我省略掉了. 发送邮件我使用的是python, 通过该shell脚本调用python发送邮件.(因为用linux自带的我发现会出现很多垃圾邮件, 怎么解决我没有去研究). 得到的数据我会写入数据库记录, 在这里为了通用性, 我把写入数据库部分省略掉了.