| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127 |
- #!/usr/bin/env /usr/bin/python3
- # -*- coding:utf-8 -*-
- import os
- import socket
- import sys
- import time
- import findspark
- def cow_says():
- os.system(f'source {PROJECT_ROOT_PATH}/bin/common/functions.sh')
- # HADOOP_CONF_DIR / HIVE_CONF_DIR:Spark on YARN + Hive metastore 定位需要
- # 缺 HADOOP_CONF_DIR → SparkSubmitArguments 启动校验失败
- # 缺 HIVE_CONF_DIR → enableHiveSupport 回落 in-memory metastore,看不到 HMS 真实库表
- # DataX JVM 不读 classpath 中的 conf,HA 由 ini [hadoop_config] 节显式注入
- os.environ['HADOOP_CONF_DIR'] = '/etc/hadoop/conf'
- os.environ['HIVE_CONF_DIR'] = '/etc/hive/conf'
- # os.environ['JAVA_HOME'] = '/usr/local/java'
- os.environ["PYSPARK_DRIVER_PYTHON"] = "/usr/bin/python3"
- os.environ["PYSPARK_PYTHON"] = "/usr/bin/python3"
- # os.environ['SPARK_HOME'] = '/usr/hdp/3.1.5.0-152/spark2'
- os.environ['PYTHONUNBUFFERED'] = 'x'
- PROJECT_ROOT_PATH = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
- PROJECT_NAME = os.path.basename(PROJECT_ROOT_PATH)
- sys.path.append(PROJECT_ROOT_PATH)
- # 公用的Spark UDF文件
- COMMON_SPARK_UDF_FILE = 'dw_base/udf/common/spark_common_udf.py'
- BANNED_USER = 'root'
- RELEASE_USER = 'alvis'
- USER = os.environ['USER']
- HOME = os.environ['HOME']
- if USER == BANNED_USER and HOME.startswith('/home'):
- USER = os.path.basename(HOME)
- HOST = socket.gethostname()
- RELEASE_ROOT_DIR = '/home/alvis/release'
- if not PROJECT_ROOT_PATH.startswith(RELEASE_ROOT_DIR) or USER != RELEASE_USER:
- DO_RESET: str = '\033[0m'
- CHG_BOLD: str = '\033[1m'
- NORM_RED: str = '\033[0;31m'
- NORM_GRN: str = '\033[0;32m'
- NORM_YEL: str = '\033[0;33m'
- NORM_BLU: str = '\033[0;34m'
- NORM_MGT: str = '\033[0;35m'
- NORM_CYN: str = '\033[0;36m'
- NORM_WHT: str = '\033[0;37m'
- BOLD_RED: str = '\033[1;31m'
- BOLD_GRN: str = '\033[1;32m'
- BOLD_YEL: str = '\033[1;33m'
- BOLD_BLU: str = '\033[1;34m'
- BOLD_MGT: str = '\033[1;35m'
- BOLD_CYN: str = '\033[1;36m'
- BOLD_WHT: str = '\033[1;37m'
- BGRD_RED: str = '\033[41m'
- BGRD_GRN: str = '\033[42m'
- BGRD_YEL: str = '\033[43m'
- BGRD_BLU: str = '\033[44m'
- BGRD_MGT: str = '\033[45m'
- BGRD_CYN: str = '\033[46m'
- BGRD_WHT: str = '\033[47m'
- else:
- DO_RESET: str = ''
- CHG_BOLD: str = ''
- NORM_RED: str = ''
- NORM_GRN: str = ''
- NORM_YEL: str = ''
- NORM_BLU: str = ''
- NORM_MGT: str = ''
- NORM_CYN: str = ''
- NORM_WHT: str = ''
- BOLD_RED: str = ''
- BOLD_GRN: str = ''
- BOLD_YEL: str = ''
- BOLD_BLU: str = ''
- BOLD_MGT: str = ''
- BOLD_CYN: str = ''
- BOLD_WHT: str = ''
- BGRD_RED: str = ''
- BGRD_GRN: str = ''
- BGRD_YEL: str = ''
- BGRD_BLU: str = ''
- BGRD_MGT: str = ''
- BGRD_CYN: str = ''
- BGRD_WHT: str = ''
- IS_RUN_BY_RELEASE_USER = False
- IS_RUN_BY_NORMAL_USER = False
- if USER == RELEASE_USER:
- LOG_ROOT_DIR = "/opt/data/log"
- IS_RUN_BY_RELEASE_USER = True
- elif USER == BANNED_USER:
- ERROR_CODE = 18
- print(f'{NORM_MGT}{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} '
- f'{NORM_RED}Project {NORM_GRN}{PROJECT_NAME} '
- f'{NORM_RED}is running by banned user {NORM_GRN}{BANNED_USER}'
- f'{NORM_RED}, exit with error code {NORM_GRN}{ERROR_CODE}'
- f'{DO_RESET}')
- exit(ERROR_CODE)
- else:
- IS_RUN_BY_NORMAL_USER = True
- LOG_ROOT_DIR = f'{HOME}/data/log'
- cow_says()
- print(f'{NORM_CYN}{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} '
- f'{NORM_MGT}Project {NORM_GRN}{PROJECT_NAME} '
- f'{NORM_MGT}is running in normal user {NORM_GRN}{USER}')
- if PROJECT_ROOT_PATH.startswith(f'{RELEASE_ROOT_DIR}/{PROJECT_NAME}'):
- IS_RUN_IN_RELEASE_DIR = True
- print(f'{NORM_CYN}{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} '
- f'{NORM_MGT}Project {NORM_GRN}{PROJECT_NAME} '
- f'{NORM_MGT}is running in release dir {NORM_GRN}{RELEASE_ROOT_DIR}/{PROJECT_NAME}')
- else:
- IS_RUN_IN_RELEASE_DIR = False
- print(f'{NORM_CYN}{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} '
- f'{NORM_MGT}Project {NORM_GRN}{PROJECT_NAME} '
- f'{NORM_MGT}is running in normal user dir {NORM_GRN}{PROJECT_ROOT_PATH}')
- if not IS_RUN_IN_RELEASE_DIR or USER != RELEASE_USER:
- os.system(f'echo -en "{NORM_GRN}"')
- os.system(f'echo -en "{DO_RESET}"')
- findspark.init()
|