1 vecka sedan · 77c821b2f4
--- a/dw_base/spark/spark_sql.py
+++ b/dw_base/spark/spark_sql.py
@@ -35,29 +35,7 @@ def _load_spark_conf_file(path: str) -> Dict[str, str]:
 
				 
			
 
				 
			
 
				 class SparkSQL(object):
			
 
				-    """
			
 
				-    封装执行 Spark 相关操作的类, 相关参数说明:
			
 
				-
			
 
				-    In Spark 2.0+ version
			
 
				-
			
 
				-    use spark session variable to set number of executors dynamically (from within program)
			
 
				-
			
 
				-    spark.conf.set("spark.executor.instances', 4)
			
 
				-    spark.conf.set("spark.executor.cores', 4)
			
 
				-
			
 
				-    In above case maximum 16 tasks will be executed at any given time.
			
 
				-    other option is dynamic allocation of executors as below -
			
 
				-
			
 
				-    spark.conf.set("spark.dynamicAllocation.enabled', "true')
			
 
				-    spark.conf.set("spark.executor.cores', 4)
			
 
				-    spark.conf.set("spark.dynamicAllocation.minExecutors',"1')
			
 
				-    spark.conf.set("spark.dynamicAllocation.maxExecutors',"5')
			
 
				-
			
 
				-    spark.yarn.executor.memoryOverhead：default is executorMemory * 0.07, with minimum of 384
			
 
				-    spark.yarn.driver.memoryOverhead：default is driverMemory * 0.07, with minimum of 384
			
 
				-    spark.yarn.am.memoryOverhead：default is AM memory * 0.07, with minimum of 384
			
 
				-
			
 
				-    """
			
 
				+    """封装 Spark 会话与 SQL 执行；参数三级覆盖见 conf/spark-defaults.conf + spark-tuning.conf 与 kb/00 §4.2。"""
			
 
				     REGISTERED_UDF_FILES = []
			
 
				     ADDED_RESOURCE_FILES = []
			
 
				     REGISTERED_UDF = {}
			
--- a/dw_base/utils/datetime_utils.py
+++ b/dw_base/utils/datetime_utils.py
@@ -83,30 +83,8 @@ def get_date_range(some_date: str) -> [str]:
 
				 
			
 
				 
			
 
				 def parse_datetime(date_time: str, original_format: str = None) -> datetime:
			
 
				-    """
			
 
				-    先识别日期，再按照指定格式输出
			
 
				-    %y 两位数的年份表示（00-99）
			
 
				-    %Y 四位数的年份表示（0000-9999）
			
 
				-    %m 月份（01-12）
			
 
				-    %d 月内中的一天（0-31）
			
 
				-    %H 24小时制小时数（0-23）
			
 
				-    %I 12小时制小时数（01-12）
			
 
				-    %M 分钟数（00-59）
			
 
				-    %S 秒（00-59）
			
 
				-    %a 本地简化星期名称
			
 
				-    %A 本地完整星期名称
			
 
				-    %b 本地简化的月份名称
			
 
				-    %B 本地完整的月份名称
			
 
				-    %c 本地相应的日期表示和时间表示
			
 
				-    %j 年内的一天（001-366）
			
 
				-    %p 本地A.M.或P.M.的等价符
			
 
				-    %U 一年中的星期数（00-53）星期天为星期的开始
			
 
				-    %w 星期（0-6），星期天为星期的开始
			
 
				-    %W 一年中的星期数（00-53）星期一为星期的开始
			
 
				-    %x 本地相应的日期表示
			
 
				-    %X 本地相应的时间表示
			
 
				-    %Z 当前时区的名称
			
 
				-    %% %号本身
			
 
				+    """解析日期字符串为 datetime：传 original_format 按它解析，否则用 dateutil 自动识别。
			
 
				+
			
 
				     Args:
			
 
				         date_time: 日期
			
 
				         original_format: 原日期格式
			
--- a/dw_base/utils/sql_utils.py
+++ b/dw_base/utils/sql_utils.py
@@ -58,14 +58,6 @@ def get_sql_list_from_file(sql_file: str, trim_comment: bool = False) -> List[st
 
				             sql_list.append(sql_buffer + line.strip().strip(';'))
			
 
				             sql_buffer = ''
			
 
				             continue
			
 
				-        # if line.strip().__contains__(';'):
			
 
				-        #     # 新行含有分号（比较复杂的逻辑，如 like '%abc;def%'），如果分号左边的单引号个数是奇数个，应认为分号是作为参数的（没有实现，先一刀切认为是语句结尾吧）
			
 
				-        #     parts = line.split(';')
			
 
				-        #     sql_list.append(sql_buffer + parts[0])
			
 
				-        #     for index in range(1, len(parts) - 1):
			
 
				-        #         sql_list.append(parts[index])
			
 
				-        #     sql_buffer = parts[-1]
			
 
				-        #     continue
			
 
				         sql_buffer += line
			
 
				     if sql_buffer != '':
			
 
				         sql_list.append(sql_buffer)