|
@@ -35,29 +35,7 @@ def _load_spark_conf_file(path: str) -> Dict[str, str]:
|
|
|
|
|
|
|
|
|
|
|
|
|
class SparkSQL(object):
|
|
class SparkSQL(object):
|
|
|
- """
|
|
|
|
|
- 封装执行 Spark 相关操作的类, 相关参数说明:
|
|
|
|
|
-
|
|
|
|
|
- In Spark 2.0+ version
|
|
|
|
|
-
|
|
|
|
|
- use spark session variable to set number of executors dynamically (from within program)
|
|
|
|
|
-
|
|
|
|
|
- spark.conf.set("spark.executor.instances', 4)
|
|
|
|
|
- spark.conf.set("spark.executor.cores', 4)
|
|
|
|
|
-
|
|
|
|
|
- In above case maximum 16 tasks will be executed at any given time.
|
|
|
|
|
- other option is dynamic allocation of executors as below -
|
|
|
|
|
-
|
|
|
|
|
- spark.conf.set("spark.dynamicAllocation.enabled', "true')
|
|
|
|
|
- spark.conf.set("spark.executor.cores', 4)
|
|
|
|
|
- spark.conf.set("spark.dynamicAllocation.minExecutors',"1')
|
|
|
|
|
- spark.conf.set("spark.dynamicAllocation.maxExecutors',"5')
|
|
|
|
|
-
|
|
|
|
|
- spark.yarn.executor.memoryOverhead:default is executorMemory * 0.07, with minimum of 384
|
|
|
|
|
- spark.yarn.driver.memoryOverhead:default is driverMemory * 0.07, with minimum of 384
|
|
|
|
|
- spark.yarn.am.memoryOverhead:default is AM memory * 0.07, with minimum of 384
|
|
|
|
|
-
|
|
|
|
|
- """
|
|
|
|
|
|
|
+ """封装 Spark 会话与 SQL 执行;参数三级覆盖见 conf/spark-defaults.conf + spark-tuning.conf 与 kb/00 §4.2。"""
|
|
|
REGISTERED_UDF_FILES = []
|
|
REGISTERED_UDF_FILES = []
|
|
|
ADDED_RESOURCE_FILES = []
|
|
ADDED_RESOURCE_FILES = []
|
|
|
REGISTERED_UDF = {}
|
|
REGISTERED_UDF = {}
|