__init__.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. #!/usr/bin/env /usr/bin/python3
  2. # -*- coding:utf-8 -*-
  3. import os
  4. import socket
  5. import sys
  6. import time
  7. import findspark
  8. def cow_says():
  9. os.system(f'source {PROJECT_ROOT_PATH}/bin/common/functions.sh')
  10. # HADOOP_CONF_DIR / HIVE_CONF_DIR:Spark on YARN + Hive metastore 定位需要
  11. # 缺 HADOOP_CONF_DIR → SparkSubmitArguments 启动校验失败
  12. # 缺 HIVE_CONF_DIR → enableHiveSupport 回落 in-memory metastore,看不到 HMS 真实库表
  13. # DataX JVM 不读 classpath 中的 conf,HA 由 ini [hadoop_config] 节显式注入
  14. os.environ['HADOOP_CONF_DIR'] = '/etc/hadoop/conf'
  15. os.environ['HIVE_CONF_DIR'] = '/etc/hive/conf'
  16. # os.environ['JAVA_HOME'] = '/usr/local/java'
  17. os.environ["PYSPARK_DRIVER_PYTHON"] = "/usr/bin/python3"
  18. os.environ["PYSPARK_PYTHON"] = "/usr/bin/python3"
  19. # os.environ['SPARK_HOME'] = '/usr/hdp/3.1.5.0-152/spark2'
  20. os.environ['PYTHONUNBUFFERED'] = 'x'
  21. PROJECT_ROOT_PATH = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
  22. PROJECT_NAME = os.path.basename(PROJECT_ROOT_PATH)
  23. sys.path.append(PROJECT_ROOT_PATH)
  24. # 公用的Spark UDF文件
  25. COMMON_SPARK_UDF_FILE = 'dw_base/udf/common/spark_common_udf.py'
  26. BANNED_USER = 'root'
  27. RELEASE_USER = 'alvis'
  28. USER = os.environ['USER']
  29. HOME = os.environ['HOME']
  30. if USER == BANNED_USER and HOME.startswith('/home'):
  31. USER = os.path.basename(HOME)
  32. HOST = socket.gethostname()
  33. RELEASE_ROOT_DIR = '/home/alvis/release'
  34. if not PROJECT_ROOT_PATH.startswith(RELEASE_ROOT_DIR) or USER != RELEASE_USER:
  35. DO_RESET: str = '\033[0m'
  36. CHG_BOLD: str = '\033[1m'
  37. NORM_RED: str = '\033[0;31m'
  38. NORM_GRN: str = '\033[0;32m'
  39. NORM_YEL: str = '\033[0;33m'
  40. NORM_BLU: str = '\033[0;34m'
  41. NORM_MGT: str = '\033[0;35m'
  42. NORM_CYN: str = '\033[0;36m'
  43. NORM_WHT: str = '\033[0;37m'
  44. BOLD_RED: str = '\033[1;31m'
  45. BOLD_GRN: str = '\033[1;32m'
  46. BOLD_YEL: str = '\033[1;33m'
  47. BOLD_BLU: str = '\033[1;34m'
  48. BOLD_MGT: str = '\033[1;35m'
  49. BOLD_CYN: str = '\033[1;36m'
  50. BOLD_WHT: str = '\033[1;37m'
  51. BGRD_RED: str = '\033[41m'
  52. BGRD_GRN: str = '\033[42m'
  53. BGRD_YEL: str = '\033[43m'
  54. BGRD_BLU: str = '\033[44m'
  55. BGRD_MGT: str = '\033[45m'
  56. BGRD_CYN: str = '\033[46m'
  57. BGRD_WHT: str = '\033[47m'
  58. else:
  59. DO_RESET: str = ''
  60. CHG_BOLD: str = ''
  61. NORM_RED: str = ''
  62. NORM_GRN: str = ''
  63. NORM_YEL: str = ''
  64. NORM_BLU: str = ''
  65. NORM_MGT: str = ''
  66. NORM_CYN: str = ''
  67. NORM_WHT: str = ''
  68. BOLD_RED: str = ''
  69. BOLD_GRN: str = ''
  70. BOLD_YEL: str = ''
  71. BOLD_BLU: str = ''
  72. BOLD_MGT: str = ''
  73. BOLD_CYN: str = ''
  74. BOLD_WHT: str = ''
  75. BGRD_RED: str = ''
  76. BGRD_GRN: str = ''
  77. BGRD_YEL: str = ''
  78. BGRD_BLU: str = ''
  79. BGRD_MGT: str = ''
  80. BGRD_CYN: str = ''
  81. BGRD_WHT: str = ''
  82. IS_RUN_BY_RELEASE_USER = False
  83. IS_RUN_BY_NORMAL_USER = False
  84. if USER == RELEASE_USER:
  85. LOG_ROOT_DIR = "/opt/data/log"
  86. IS_RUN_BY_RELEASE_USER = True
  87. elif USER == BANNED_USER:
  88. ERROR_CODE = 18
  89. print(f'{NORM_MGT}{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} '
  90. f'{NORM_RED}Project {NORM_GRN}{PROJECT_NAME} '
  91. f'{NORM_RED}is running by banned user {NORM_GRN}{BANNED_USER}'
  92. f'{NORM_RED}, exit with error code {NORM_GRN}{ERROR_CODE}'
  93. f'{DO_RESET}')
  94. exit(ERROR_CODE)
  95. else:
  96. IS_RUN_BY_NORMAL_USER = True
  97. LOG_ROOT_DIR = f'{HOME}/data/log'
  98. cow_says()
  99. print(f'{NORM_CYN}{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} '
  100. f'{NORM_MGT}Project {NORM_GRN}{PROJECT_NAME} '
  101. f'{NORM_MGT}is running in normal user {NORM_GRN}{USER}')
  102. if PROJECT_ROOT_PATH.startswith(f'{RELEASE_ROOT_DIR}/{PROJECT_NAME}'):
  103. IS_RUN_IN_RELEASE_DIR = True
  104. print(f'{NORM_CYN}{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} '
  105. f'{NORM_MGT}Project {NORM_GRN}{PROJECT_NAME} '
  106. f'{NORM_MGT}is running in release dir {NORM_GRN}{RELEASE_ROOT_DIR}/{PROJECT_NAME}')
  107. else:
  108. IS_RUN_IN_RELEASE_DIR = False
  109. print(f'{NORM_CYN}{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} '
  110. f'{NORM_MGT}Project {NORM_GRN}{PROJECT_NAME} '
  111. f'{NORM_MGT}is running in normal user dir {NORM_GRN}{PROJECT_ROOT_PATH}')
  112. if not IS_RUN_IN_RELEASE_DIR or USER != RELEASE_USER:
  113. os.system(f'echo -en "{NORM_GRN}"')
  114. os.system(f'echo -en "{DO_RESET}"')
  115. findspark.init()