一.CMS GC
实时
spark-submit \
--class com.sznongfu.cn.center.StartCenter
--master spark://$(hostname):7077 \
--executor-memory 1G \
#每个executor分配的cores,这样避免资源争用,分配不合理
--executor-cores 2 \
--total-executor-cores 4 \
--driver-java-options "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled -XX:+HeapDumpOnOutOfMemoryError -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8" \
--conf "spark.executor.extraJavaOptions= -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled -XX:+HeapDumpOnOutOfMemoryError -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8" \
/home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar /home/hadoop/properties/hdfs.properties com.sznongfu.cn.online.statistic.NginxLogStatistic
-XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false
spark-submit \
--class com.sznongfu.cn.center.StartCenter \
--master spark://$(hostname):7077 \
--executor-memory 1G \
#每个executor分配的cores,这样避免资源争用,分配不合理
--executor-cores 2 \
--total-executor-cores 4 \
--driver-java-options "-XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false
" \
--conf "spark.executor.extraJavaOptions= -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false" /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar /home/hadoop/properties/hdfs.properties com.sznongfu.cn.online.statistic.NginxLogStatistic
离线:
spark-submit \
--class com.sznongfu.cn.center.StartCenter \
--master spark://$(hostname):7077 \
--executor-memory 1G \
#每个executor分配的cores,这样避免资源争用,分配不合理
--executor-cores 2 \
--total-executor-cores 4 \
--driver-java-options "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled -XX:+HeapDumpOnOutOfMemoryError -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8" \
--conf "spark.executor.extraJavaOptions= -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled -XX:+HeapDumpOnOutOfMemoryError -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8" \
/home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar /home/hadoop/properties/hdfs.properties com.sznongfu.cn.offline.statistic.UserTrajectoryStatistic
spark-submit \
--class com.sznongfu.cn.center.StartCenter \
--master spark://$(hostname):7077 \
--executor-memory 1G \
#每个executor分配的cores,这样避免资源争用,分配不合理
--executor-cores 2 \
--total-executor-cores 4 \
--driver-java-options "-XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false " \
--conf "-XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false " /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar /home/hadoop/properties/hdfs.properties com.sznongfu.cn.offline.statistic.UserTrajectoryStatistic
-XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false
################################测试修改
实时:
spark-submit --class com.sznongfu.cn.center.StartCenter --master spark://$(hostname):7077 --executor-memory 1500M --total-executor-cores 2 --driver-java-options "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled -XX:+HeapDumpOnOutOfMemoryError -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8" --conf "spark.executor.extraJavaOptions= -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled -XX:+HeapDumpOnOutOfMemoryError -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8" /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar /home/hadoop/properties/hdfs.properties com.sznongfu.cn.online.statistic.NginxLogStatistic
离线:
spark-submit --class com.sznongfu.cn.center.StartCenter --master spark://$(hostname):7077 --executor-memory 1500M --total-executor-cores 2 --driver-java-options "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled -XX:+HeapDumpOnOutOfMemoryError -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8" --conf "spark.executor.extraJavaOptions= -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled -XX:+HeapDumpOnOutOfMemoryError -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8" /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar /home/hadoop/properties/hdfs.properties com.sznongfu.cn.online.statistic.NginxLogStatistic
二 G1 GC
/* 备用 不可用
spark-submit --class com.sznongfu.cn.center.StartCenter --name sznongfuzhanggui --master spark://hadoop:7077 --executor-memory 1G --total-executor-cores 5 --conf "spark.executor.extraJavaOptions=-XX:+UseG1GC -XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark -XX:InitiatingHeapOccupancyPercent=35 -XX:ConcGCThread=20" /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar /home/hadoop/properties/hdfs.properties com.sznongfu.cn.online.statistic.NginxLogStatistic
备用 不可用*/
/* 可用 */
spark-submit --class com.sznongfu.cn.center.StartCenter --name sznongfuzhanggui --master spark://hadoop:7077 --executor-memory 1G --total-executor-cores 5 --conf "spark.executor.extraJavaOptions=-XX:+UseG1GC -XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark " /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar /home/hadoop/properties/hdfs.properties com.sznongfu.cn.online.statistic.NginxLogStatistic
/* 可用 */
英特尔最佳JVM实践
-XX:+UseG1GC -XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark -Xms88g -Xmx88g -XX:InitiatingHeapOccupancyPercent=35 -XX:ConcGCThread=20
kafka GC方式:
00:05:54 /home/sznongfu/opt/jdk-1.8.0/bin/java -Xmx512M -server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dkafka.logs.dir=/home/sznongfu/opt/kafka_2.11-0.10.0.0/bin/../logs -Dlog4j.configuration=file:/home/sznongfu/opt/kafka_2.11-0.10.0.0/bin/../config/tools-log4j.properties -cp
spark-submit --class com.sznongfu.cn.center.StartCenter --name sznongfuzhanggui --master spark://hadoop:7077 --executor-memory 2G --total-executor-cores 2 --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar /home/hadoop/properties/hdfs.properties com.sznongfu.cn.offline.statistic.UserTrajectoryStatistic
三.自动化脚本
1.crontab -e
* 3 * * * /start.sh
2.shell
在merchant-benchmarkA上进入 opt/nginx-1.9/logs/下
1.日志存放在merchant-master-logs/里
临时的日志存放到api-logs里
2.查看日志的日期,将需要的日志复制过去就可以
将merchant-master-logs里的 api相关的日志复制到 ./api-logs里
3.日志格式
当天的日志: .log 结尾
昨天04:00~今天04:00: .log.1 结尾
昨天04:00之前的日志: .log.2.gz 结尾
4.传输
通过scp命令将日志传输到本地
4.获取当天数据
scp [email protected]:/home/sznongfu/opt/nginx-1.9/logs/merchant-master-logs/*.api.log.1 .
scp [email protected]:/home/sznongfu/opt/nginx-1.9/logs/merchant-master-logs/*.api.log .
5.获取昨天数据
scp [email protected]:/home/sznongfu/opt/nginx-1.9/logs/merchant-master-logs/*.api.log.1 .
scp [email protected]:/home/sznongfu/opt/nginx-1.9/logs/merchant-master-logs/*.api.log.2.gz .
online:
function run_online_spark_submit()
{
${spark_bin_dir}/spark-submit \
--class com.sznongfu.cn.center.StartCenter \
--master spark://$(hostname):7077 \
--executor-memory 1G \
--executor-cores 2 \
--total-executor-cores 2 \
--driver-class-path /home/sznongfu/opt/spark-2.2.0-bin-hadoop2.7/jars/ \
${la_data_jar} ${la_data_conf} ${online_class_name} >> ${la_log} 2>&1 &
local class_ppid=$$
local class_pid=$(ps -ef | grep -w ${class_ppid} | grep ${online_class_name} | grep -v grep | awk '{print $2}')
echo "{\"class\":\"$online_class_name\", \"pid\":\"${class_pid}\"}" > ${la_data_pid_file}
}
offline:
function run_offline_spark_submit()
{
${spark_bin_dir}/spark-submit \
--class com.sznongfu.cn.center.StartCenter \
--master spark://$(hostname):7077 \
--executor-memory 1G \
--executor-cores 2 \
--total-executor-cores 2 \
--driver-class-path /home/sznongfu/opt/spark-2.2.0-bin-hadoop2.7/jars/ \
${la_data_jar} ${la_data_conf} ${offline_class_name} >> ${la_offline_log} 2>&1 &
}
spark-sumit 无法启动
1、WARN TaskSchedulerImpl: Initial job has not accepted any resources; check your cluster uito ensure that workers are registered and have sufficient memory
当前的集群的可用资源不能满足应用程序所请求的资源。
资源分2类: cores 和 ram
Core代表对执行可用的executor slots
Ram代表每个Worker上被需要的空闲内存来运行你的Application。
解决方法:
应用不要请求多余空闲可用资源的
关闭掉已经执行结束的Application
2
2. worker挂掉或假死
有时候我们还会在web ui中看到worker节点消失或处于dead状态,在该节点运行的任务则会报各种 lost worker 的错误,引发原因和上述大体相同,worker内存中保存了大量的ui信息导致gc时失去和master之间的心跳。
解决
增加Master的内存占用,在Worker节点spark-env.sh 中设置:
export SPARK_DAEMON_MEMORY 2g # 根据你的实际情况
2worker假死: 减少保存在Worker内存中的Driver,Executor信息
spark.worker.ui.retainedExecutors 200 # 默认都是1000
spark.worker.ui.retainedDrivers 200