Spark-submit standalone 提交应用

一.CMS GC

实时
 spark-submit   \

--class com.sznongfu.cn.center.StartCenter

 --master spark://$(hostname):7077   \

--executor-memory 1G  \

#每个executor分配的cores,这样避免资源争用,分配不合理
--executor-cores  2 \

--total-executor-cores 4  \

--driver-java-options    "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled   -XX:+HeapDumpOnOutOfMemoryError   -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8"    \

--conf "spark.executor.extraJavaOptions=  -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled   -XX:+HeapDumpOnOutOfMemoryError   -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8"     \

/home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar   /home/hadoop/properties/hdfs.properties  com.sznongfu.cn.online.statistic.NginxLogStatistic

-XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false 


 spark-submit    \

--class com.sznongfu.cn.center.StartCenter   \

--master spark://$(hostname):7077   \

--executor-memory 1G  \

#每个executor分配的cores,这样避免资源争用,分配不合理
--executor-cores  2 \

--total-executor-cores 4  \

--driver-java-options    "-XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false 
"    \

--conf "spark.executor.extraJavaOptions=  -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false"     /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar   /home/hadoop/properties/hdfs.properties  com.sznongfu.cn.online.statistic.NginxLogStatistic

离线:

spark-submit         \

--class com.sznongfu.cn.center.StartCenter      \

   --master spark://$(hostname):7077         \

--executor-memory 1G        \

#每个executor分配的cores,这样避免资源争用,分配不合理
--executor-cores  2 \

--total-executor-cores 4 \

--driver-java-options  "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled   -XX:+HeapDumpOnOutOfMemoryError   -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8"        \

--conf "spark.executor.extraJavaOptions=  -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled   -XX:+HeapDumpOnOutOfMemoryError   -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8"  \

/home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar   /home/hadoop/properties/hdfs.properties    com.sznongfu.cn.offline.statistic.UserTrajectoryStatistic

spark-submit      \

   --class com.sznongfu.cn.center.StartCenter      \

   --master spark://$(hostname):7077       \

  --executor-memory 1G      \

#每个executor分配的cores,这样避免资源争用,分配不合理
--executor-cores  2 \

--total-executor-cores 4 \

--driver-java-options  "-XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false "        \

--conf "-XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false "               /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar   /home/hadoop/properties/hdfs.properties    com.sznongfu.cn.offline.statistic.UserTrajectoryStatistic


-XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false 


################################测试修改
实时:

 spark-submit    --class com.sznongfu.cn.center.StartCenter  --master spark://$(hostname):7077    --executor-memory 1500M  --total-executor-cores 2     --driver-java-options    "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled   -XX:+HeapDumpOnOutOfMemoryError   -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8"    --conf "spark.executor.extraJavaOptions=  -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled   -XX:+HeapDumpOnOutOfMemoryError   -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8"     /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar   /home/hadoop/properties/hdfs.properties  com.sznongfu.cn.online.statistic.NginxLogStatistic

离线:

spark-submit    --class com.sznongfu.cn.center.StartCenter  --master spark://$(hostname):7077    --executor-memory 1500M  --total-executor-cores 2   --driver-java-options    "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled   -XX:+HeapDumpOnOutOfMemoryError   -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8"    --conf "spark.executor.extraJavaOptions=  -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled   -XX:+HeapDumpOnOutOfMemoryError   -XX:MaxTenuringThreshold=10 -XX:SurvivorRatio=8"     /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar   /home/hadoop/properties/hdfs.properties  com.sznongfu.cn.online.statistic.NginxLogStatistic
 

二 G1 GC

/* 备用 不可用
spark-submit    --class com.sznongfu.cn.center.StartCenter  --name  sznongfuzhanggui   --master spark://hadoop:7077     --executor-memory 1G     --total-executor-cores 5   --conf "spark.executor.extraJavaOptions=-XX:+UseG1GC -XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark -XX:InitiatingHeapOccupancyPercent=35 -XX:ConcGCThread=20"    /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar   /home/hadoop/properties/hdfs.properties   com.sznongfu.cn.online.statistic.NginxLogStatistic
备用  不可用*/

/* 可用 */

 spark-submit    --class com.sznongfu.cn.center.StartCenter  --name  sznongfuzhanggui   --master spark://hadoop:7077     --executor-memory 1G     --total-executor-cores 5   --conf "spark.executor.extraJavaOptions=-XX:+UseG1GC -XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark "    /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar   /home/hadoop/properties/hdfs.properties   com.sznongfu.cn.online.statistic.NginxLogStatistic

/* 可用 */


英特尔最佳JVM实践
-XX:+UseG1GC -XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark -Xms88g -Xmx88g -XX:InitiatingHeapOccupancyPercent=35 -XX:ConcGCThread=20


kafka GC方式:
00:05:54 /home/sznongfu/opt/jdk-1.8.0/bin/java -Xmx512M -server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dkafka.logs.dir=/home/sznongfu/opt/kafka_2.11-0.10.0.0/bin/../logs -Dlog4j.configuration=file:/home/sznongfu/opt/kafka_2.11-0.10.0.0/bin/../config/tools-log4j.properties -cp

 
spark-submit    --class com.sznongfu.cn.center.StartCenter  --name  sznongfuzhanggui   --master spark://hadoop:7077  --executor-memory 2G    --total-executor-cores 2       --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"    /home/jerry/racoon/nongfu.ngx.log.analyser/modules/LogProcess/out/libs/LogProcess-1.0-SNAPSHOT-all.jar   /home/hadoop/properties/hdfs.properties    com.sznongfu.cn.offline.statistic.UserTrajectoryStatistic

三.自动化脚本

1.crontab -e

* 3 * * *  /start.sh

2.shell

在merchant-benchmarkA上进入 opt/nginx-1.9/logs/下

1.日志存放在merchant-master-logs/里
临时的日志存放到api-logs里


2.查看日志的日期,将需要的日志复制过去就可以
将merchant-master-logs里的 api相关的日志复制到 ./api-logs里


3.日志格式
当天的日志: .log 结尾

昨天04:00~今天04:00:  .log.1 结尾

昨天04:00之前的日志:   .log.2.gz 结尾

4.传输
通过scp命令将日志传输到本地

4.获取当天数据
scp  [email protected]:/home/sznongfu/opt/nginx-1.9/logs/merchant-master-logs/*.api.log.1  .
scp  [email protected]:/home/sznongfu/opt/nginx-1.9/logs/merchant-master-logs/*.api.log  .

5.获取昨天数据

scp  [email protected]:/home/sznongfu/opt/nginx-1.9/logs/merchant-master-logs/*.api.log.1  .
scp  [email protected]:/home/sznongfu/opt/nginx-1.9/logs/merchant-master-logs/*.api.log.2.gz  .
 


online:
function run_online_spark_submit()
{
    ${spark_bin_dir}/spark-submit \
        --class com.sznongfu.cn.center.StartCenter \
        --master spark://$(hostname):7077 \
        --executor-memory 1G \
        --executor-cores  2   \
        --total-executor-cores 2 \
        --driver-class-path /home/sznongfu/opt/spark-2.2.0-bin-hadoop2.7/jars/ \
        ${la_data_jar} ${la_data_conf} ${online_class_name} >> ${la_log} 2>&1 &
    local class_ppid=$$
    local class_pid=$(ps -ef | grep -w ${class_ppid} | grep ${online_class_name} | grep -v grep | awk '{print $2}')
    echo "{\"class\":\"$online_class_name\", \"pid\":\"${class_pid}\"}" > ${la_data_pid_file}
}

offline:

function run_offline_spark_submit()
{
    ${spark_bin_dir}/spark-submit \
        --class com.sznongfu.cn.center.StartCenter \
        --master spark://$(hostname):7077 \
        --executor-memory 1G \
        --executor-cores  2   \
        --total-executor-cores 2 \
        --driver-class-path /home/sznongfu/opt/spark-2.2.0-bin-hadoop2.7/jars/ \
        ${la_data_jar} ${la_data_conf} ${offline_class_name} >> ${la_offline_log} 2>&1 &
}


spark-sumit 无法启动
1、WARN TaskSchedulerImpl: Initial job has not accepted any resources; check your cluster uito ensure that workers are registered and have sufficient memory
当前的集群的可用资源不能满足应用程序所请求的资源。
资源分2类: cores 和 ram
Core代表对执行可用的executor slots
Ram代表每个Worker上被需要的空闲内存来运行你的Application。
解决方法:
应用不要请求多余空闲可用资源的
关闭掉已经执行结束的Application


2
2. worker挂掉或假死

有时候我们还会在web ui中看到worker节点消失或处于dead状态,在该节点运行的任务则会报各种 lost worker 的错误,引发原因和上述大体相同,worker内存中保存了大量的ui信息导致gc时失去和master之间的心跳。

解决

增加Master的内存占用,在Worker节点spark-env.sh 中设置:

export SPARK_DAEMON_MEMORY 2g # 根据你的实际情况

2worker假死: 减少保存在Worker内存中的Driver,Executor信息

spark.worker.ui.retainedExecutors 200   # 默认都是1000
spark.worker.ui.retainedDrivers 200   

发布了150 篇原创文章 · 获赞 15 · 访问量 10万+

猜你喜欢

转载自blog.csdn.net/dymkkj/article/details/81189740
今日推荐