from pyspark.sql import SparkSession
# 出错设置
# https://blog.csdn.net/qq_35744460/article/details/83650603
'''
.config("spark.debug.maxToStringFields", "100") \
'''
# 另外一个错误 Exception in thread "refresh progress" java.lang.OutOfMemoryError: GC overhead limit exceeded
# 错误的原因jvm分配的内存空间不足, 把相应的内存设置大点即可.
'''
.config('spark.executor.memory', '8g') \
\
.config('spark.driver.memory', '8g') \
\
.config('spark.driver.maxResultsSize', '0') \
'''
spark = SparkSession.builder \
.appName('My first app').config("spark.debug.maxToStringFields", "100") \
.config('spark.executor.memory', '8g') \
\
.config('spark.driver.memory', '8g') \
\
.config('spark.driver.maxResultsSize', '0') \
.getOrCreate()
url = 'jdbc:mysql://localhost:3306/yb?characterEncoding=utf-8&autoReconnect=true'
properties ={
'user':'root','password':'123456'}
df = spark.read.jdbc(url=url,table="js",properties=properties)
df.show()
错误的原理
https://blog.csdn.net/a5685263/article/details/102265838
spark异常篇-OutOfMemory:GC overhead limit exceeded
https://www.cnblogs.com/yanshw/p/12010729.html