__init__.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. #!/usr/bin/env /usr/bin/python3
  2. # -*- coding:utf-8 -*-
  3. import os
  4. import socket
  5. import sys
  6. import time
  7. import findspark
  8. def cow_says():
  9. os.system(f'source {PROJECT_ROOT_PATH}/bin/common/functions.sh')
  10. # HADOOP_CONF_DIR:spark-submit 启动 YARN 校验需要;DataX JVM 不读 classpath conf,HA 由 ini [hadoop_config] 节显式注入
  11. os.environ['HADOOP_CONF_DIR'] = '/etc/hadoop/conf'
  12. # SPARK_CONF_DIR:pip pyspark 默认指向自身空 conf/,显式指到集群配置才能加载 hive-site.xml,否则 enableHiveSupport 回落 in-memory metastore
  13. os.environ.setdefault('SPARK_CONF_DIR', '/etc/spark/conf')
  14. # os.environ['HIVE_CONF_DIR'] = '/etc/hive/conf'
  15. # os.environ['JAVA_HOME'] = '/usr/local/java'
  16. os.environ["PYSPARK_DRIVER_PYTHON"] = "/usr/bin/python3"
  17. os.environ["PYSPARK_PYTHON"] = "/usr/bin/python3"
  18. os.environ['PYTHONUNBUFFERED'] = 'x'
  19. PROJECT_ROOT_PATH = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
  20. PROJECT_NAME = os.path.basename(PROJECT_ROOT_PATH)
  21. sys.path.append(PROJECT_ROOT_PATH)
  22. # 公用的Spark UDF文件
  23. COMMON_SPARK_UDF_FILE = 'dw_base/udf/common/spark_common_udf.py'
  24. BANNED_USER = 'root'
  25. RELEASE_USER = 'alvis'
  26. USER = os.environ['USER']
  27. HOME = os.environ['HOME']
  28. if USER == BANNED_USER and HOME.startswith('/home'):
  29. USER = os.path.basename(HOME)
  30. HOST = socket.gethostname()
  31. RELEASE_ROOT_DIR = '/home/alvis/release'
  32. if not PROJECT_ROOT_PATH.startswith(RELEASE_ROOT_DIR) or USER != RELEASE_USER:
  33. DO_RESET: str = '\033[0m'
  34. CHG_BOLD: str = '\033[1m'
  35. NORM_RED: str = '\033[0;31m'
  36. NORM_GRN: str = '\033[0;32m'
  37. NORM_YEL: str = '\033[0;33m'
  38. NORM_BLU: str = '\033[0;34m'
  39. NORM_MGT: str = '\033[0;35m'
  40. NORM_CYN: str = '\033[0;36m'
  41. NORM_WHT: str = '\033[0;37m'
  42. BOLD_RED: str = '\033[1;31m'
  43. BOLD_GRN: str = '\033[1;32m'
  44. BOLD_YEL: str = '\033[1;33m'
  45. BOLD_BLU: str = '\033[1;34m'
  46. BOLD_MGT: str = '\033[1;35m'
  47. BOLD_CYN: str = '\033[1;36m'
  48. BOLD_WHT: str = '\033[1;37m'
  49. BGRD_RED: str = '\033[41m'
  50. BGRD_GRN: str = '\033[42m'
  51. BGRD_YEL: str = '\033[43m'
  52. BGRD_BLU: str = '\033[44m'
  53. BGRD_MGT: str = '\033[45m'
  54. BGRD_CYN: str = '\033[46m'
  55. BGRD_WHT: str = '\033[47m'
  56. else:
  57. DO_RESET: str = ''
  58. CHG_BOLD: str = ''
  59. NORM_RED: str = ''
  60. NORM_GRN: str = ''
  61. NORM_YEL: str = ''
  62. NORM_BLU: str = ''
  63. NORM_MGT: str = ''
  64. NORM_CYN: str = ''
  65. NORM_WHT: str = ''
  66. BOLD_RED: str = ''
  67. BOLD_GRN: str = ''
  68. BOLD_YEL: str = ''
  69. BOLD_BLU: str = ''
  70. BOLD_MGT: str = ''
  71. BOLD_CYN: str = ''
  72. BOLD_WHT: str = ''
  73. BGRD_RED: str = ''
  74. BGRD_GRN: str = ''
  75. BGRD_YEL: str = ''
  76. BGRD_BLU: str = ''
  77. BGRD_MGT: str = ''
  78. BGRD_CYN: str = ''
  79. BGRD_WHT: str = ''
  80. IS_RUN_BY_RELEASE_USER = False
  81. IS_RUN_BY_NORMAL_USER = False
  82. if USER == RELEASE_USER:
  83. LOG_ROOT_DIR = "/opt/data/log"
  84. IS_RUN_BY_RELEASE_USER = True
  85. elif USER == BANNED_USER:
  86. ERROR_CODE = 18
  87. print(f'{NORM_MGT}{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} '
  88. f'{NORM_RED}Project {NORM_GRN}{PROJECT_NAME} '
  89. f'{NORM_RED}is running by banned user {NORM_GRN}{BANNED_USER}'
  90. f'{NORM_RED}, exit with error code {NORM_GRN}{ERROR_CODE}'
  91. f'{DO_RESET}')
  92. exit(ERROR_CODE)
  93. else:
  94. IS_RUN_BY_NORMAL_USER = True
  95. LOG_ROOT_DIR = f'{HOME}/data/log'
  96. cow_says()
  97. print(f'{NORM_CYN}{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} '
  98. f'{NORM_MGT}Project {NORM_GRN}{PROJECT_NAME} '
  99. f'{NORM_MGT}is running in normal user {NORM_GRN}{USER}')
  100. if PROJECT_ROOT_PATH.startswith(f'{RELEASE_ROOT_DIR}/{PROJECT_NAME}'):
  101. IS_RUN_IN_RELEASE_DIR = True
  102. print(f'{NORM_CYN}{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} '
  103. f'{NORM_MGT}Project {NORM_GRN}{PROJECT_NAME} '
  104. f'{NORM_MGT}is running in release dir {NORM_GRN}{RELEASE_ROOT_DIR}/{PROJECT_NAME}')
  105. else:
  106. IS_RUN_IN_RELEASE_DIR = False
  107. print(f'{NORM_CYN}{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} '
  108. f'{NORM_MGT}Project {NORM_GRN}{PROJECT_NAME} '
  109. f'{NORM_MGT}is running in normal user dir {NORM_GRN}{PROJECT_ROOT_PATH}')
  110. if not IS_RUN_IN_RELEASE_DIR or USER != RELEASE_USER:
  111. os.system(f'echo -en "{NORM_GRN}"')
  112. os.system(f'echo -en "{DO_RESET}"')
  113. findspark.init()