ent_interface_dingtalk_update.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. import sys
  2. import re
  3. import os
  4. import requests
  5. abspath = os.path.abspath(__file__)
  6. root_path = re.sub(r"tendata-warehouse.*", "tendata-warehouse", abspath)
  7. sys.path.append(root_path)
  8. from dw_base.utils.config_utils import parse_args
  9. from dw_base.spark.spark_sql import SparkSQL
  10. import json
  11. spark = SparkSQL(udf_files=['dw_base/spark/udf/contacts/ctc_common.py',
  12. 'dw_base/spark/udf/spark_id_generate_udf.py'],
  13. extra_spark_config={'spark.sql.crossJoin.enabled': True})
  14. def send_dingtalk_notification(msg):
  15. headers = {"Content-Type": "application/json"}
  16. data = {
  17. "msgtype": "text",
  18. "text": {"content": msg}
  19. }
  20. json_data = json.dumps(data)
  21. # 企业库数据产品线
  22. url = 'https://oapi.dingtalk.com/robot/send?access_token=c4086d8ba377fdade2dff869e71063733095bc718d3bafdfbe8be0966aa050d6'
  23. # 企业库管理群
  24. # url = 'https://oapi.dingtalk.com/robot/send?access_token=5183dfe1ecbe06261bcac7b45c1a6b5ae101fec67877d74120a6a95c88d1f917'
  25. # url = 'https://oapi.dingtalk.com/robot/send?access_token=c4086d8ba377fdade2dff869e71063733095bc718d3bafdfbe8be0966aa050d6'
  26. # 企业&联系人机器人测试群
  27. # url = 'https://oapi.dingtalk.com/robot/send?access_token=bee997dbf61e839a17de087830ffef6e864c3109fef62a956703bdfe043b0e10'
  28. response = requests.post(url=url, data=json_data, headers=headers)
  29. response.raise_for_status()
  30. def get_base_cnt(dt, trigger_type):
  31. sql = f'''
  32. SELECT count(DISTINCT user_id) as user_cnt, count(distinct trace_id) as trace_cnt
  33. FROM (SELECT user_id, trace_id
  34. FROM ctc_ods.ctc_shh_interface_log
  35. WHERE dt = '{dt}'
  36. AND trigger_type = '{trigger_type}'
  37. UNION ALL
  38. SELECT user_id, trace_id
  39. FROM ctc_ods.ctc_snv_interface_log
  40. WHERE dt = '{dt}'
  41. AND trigger_type = '{trigger_type}'
  42. UNION ALL
  43. SELECT user_id, trace_id
  44. FROM ctc_ods.ctc_google_interface_log
  45. WHERE dt = '{dt}'
  46. AND trigger_type = '{trigger_type}') t
  47. '''
  48. return spark.query(sql)[0].collect()[0]
  49. def get_web_cnt(dt, trigger_type):
  50. sql = f'''
  51. select count(1) as request_web_cnt,
  52. nvl(sum(if(get_json_object(ori_json, '$.result.data.website') is not null, 1, 0)), 0) as get_web_cnt
  53. from ent_raw.interface_base
  54. where topic = 'ent_tendata_interface'
  55. and dt = '{dt}'
  56. and get_json_object(ori_json, '$.source') = 'BING'
  57. and get_json_object(ori_json, '$.type') = '{trigger_type}'
  58. '''
  59. return spark.query(sql)[0].collect()[0]
  60. def get_auto_user_cnt(dt):
  61. sql = f'''
  62. SELECT
  63. count(DISTINCT get_json_object(ori_json, '$.params.userId')) AS request_user_cnt
  64. FROM ent_raw.interface_base
  65. WHERE topic = 'ent_tendata_interface'
  66. AND dt = '{dt}'
  67. AND get_json_object(ori_json, '$.source') = 'BING'
  68. AND get_json_object(ori_json, '$.type') = 'AUTO'
  69. '''
  70. return spark.query(sql)[0].collect()[0]['request_user_cnt']
  71. def get_auto_source_cnt(dt):
  72. sql = f'''
  73. SELECT *
  74. from (select count(distinct trace_id) as shh_cnt
  75. FROM ctc_ods.ctc_shh_interface_log
  76. WHERE dt = '{dt}'
  77. AND trigger_type = 'AUTO') shh
  78. join (SELECT count(distinct trace_id) as snv_cnt
  79. FROM ctc_ods.ctc_snv_interface_log
  80. WHERE dt = '{dt}'
  81. AND trigger_type = 'AUTO') snv
  82. join (SELECT count(distinct trace_id) as ggl_cnt
  83. FROM ctc_ods.ctc_google_interface_log
  84. WHERE dt = '{dt}'
  85. AND trigger_type = 'AUTO') ggl
  86. '''
  87. return spark.query(sql)[0].collect()[0]
  88. def get_res_cnt(dt, trigger_type):
  89. sql = f'''
  90. with init as (select ti,
  91. if(source like '%shh_%', 1, 0) as shh_flag,
  92. if(source like '%snovio%', 1, 0) as snv_flag,
  93. if(source like '%google%', 1, 0) as ggl_flag
  94. from ctc_mid.ctc_main_pre
  95. LATERAL VIEW explode(trace_id) exploded_table1 AS ti
  96. where dt = '{dt}'
  97. and array_contains(trigger_type, '{trigger_type}')),
  98. flag as (select ti
  99. , if(sum(shh_flag) > 0, 1, 0) as shh_get_flag
  100. , if(sum(snv_flag) > 0, 1, 0) as snv_get_flag
  101. , if(sum(ggl_flag) > 0, 1, 0) as ggl_get_flag
  102. from init
  103. group by ti)
  104. select nvl(sum(shh_get_flag),0) as shh_get_cnt
  105. , nvl(sum(snv_get_flag),0) as snv_get_cnt
  106. , nvl(sum(ggl_get_flag),0) as ggl_get_cnt
  107. , count(ti) as all_get_cnt
  108. from flag
  109. '''
  110. return spark.query(sql)[0].collect()[0]
  111. def get_ctc_cnt(dt, trigger_type):
  112. sql = f'''
  113. select nvl(sum(if(source like '%shh_%', 1, 0)), 0) as shh_ctc_cnt,
  114. nvl(sum(if(source like '%snovio%', 1, 0)), 0) as snv_ctc_cnt,
  115. nvl(sum(if(source like '%google%', 1, 0)), 0) as ggl_ctc_cnt
  116. from ctc_mid.ctc_main_pre
  117. where dt = '{dt}'
  118. and array_contains(trigger_type, '{trigger_type}')
  119. '''
  120. return spark.query(sql)[0].collect()[0]
  121. if __name__ == '__main__':
  122. CONFIG, _ = parse_args(sys.argv[1:])
  123. dts = CONFIG.get('dt').split(',')
  124. for dt in dts:
  125. format_dt = f'{dt[:4]}-{dt[4:6]}-{dt[6:]}'
  126. manual_base_cnt = get_base_cnt(dt, 'MANUAL')
  127. manual_web_cnt = get_web_cnt(dt, 'MANUAL')
  128. manual_res_cnt = get_res_cnt(dt, 'MANUAL')
  129. manual_ctc_cnt = get_ctc_cnt(dt, 'MANUAL')
  130. manual_user_cnt = manual_base_cnt['user_cnt']
  131. manual_trace_cnt = manual_base_cnt['trace_cnt']
  132. manual_trace_avg = manual_trace_cnt / manual_user_cnt if manual_user_cnt > 0 else 0
  133. manual_web_request_cnt = manual_web_cnt['request_web_cnt']
  134. manual_web_get_cnt = manual_web_cnt['get_web_cnt']
  135. manual_web_get_pct = 100 * manual_web_get_cnt / manual_web_request_cnt if manual_web_request_cnt > 0 else 0
  136. manual_shh_get_cnt = manual_res_cnt['shh_get_cnt']
  137. manual_shh_get_pct = 100 * manual_shh_get_cnt / manual_trace_cnt if manual_trace_cnt > 0 else 0
  138. manual_snv_get_cnt = manual_res_cnt['snv_get_cnt']
  139. manual_snv_get_pct = 100 * manual_snv_get_cnt / manual_trace_cnt if manual_trace_cnt > 0 else 0
  140. manual_ggl_get_cnt = manual_res_cnt['ggl_get_cnt']
  141. manual_ggl_get_pct = 100 * manual_ggl_get_cnt / manual_trace_cnt if manual_trace_cnt > 0 else 0
  142. manual_all_get_cnt = manual_res_cnt['all_get_cnt']
  143. manual_all_get_pct = 100 * manual_all_get_cnt / manual_trace_cnt if manual_trace_cnt > 0 else 0
  144. manual_ctc_shh_cnt = manual_ctc_cnt['shh_ctc_cnt']
  145. manual_ctc_snv_cnt = manual_ctc_cnt['snv_ctc_cnt']
  146. manual_ctc_ggl_cnt = manual_ctc_cnt['ggl_ctc_cnt']
  147. ############################################################
  148. auto_base_cnt = get_base_cnt(dt, 'AUTO')
  149. auto_web_cnt = get_web_cnt(dt, 'AUTO')
  150. auto_res_cnt = get_res_cnt(dt, 'AUTO')
  151. auto_ctc_cnt = get_ctc_cnt(dt, 'AUTO')
  152. auto_user_cnt = auto_base_cnt['user_cnt']
  153. auto_trace_cnt = auto_base_cnt['trace_cnt']
  154. auto_trace_avg = auto_trace_cnt / auto_user_cnt if auto_user_cnt > 0 else 0
  155. auto_web_request_cnt = auto_web_cnt['request_web_cnt']
  156. auto_web_get_cnt = auto_web_cnt['get_web_cnt']
  157. auto_web_get_pct = 100 * auto_web_get_cnt / auto_web_request_cnt if auto_web_request_cnt > 0 else 0
  158. auto_source_cnt = get_auto_source_cnt(dt)
  159. auto_request_shh_cnt = auto_source_cnt['shh_cnt']
  160. auto_request_snv_cnt = auto_source_cnt['snv_cnt']
  161. auto_request_ggl_cnt = auto_source_cnt['ggl_cnt']
  162. auto_shh_get_cnt = auto_res_cnt['shh_get_cnt']
  163. auto_shh_get_pct = 100 * auto_shh_get_cnt / auto_request_shh_cnt if auto_request_shh_cnt > 0 else 0
  164. auto_snv_get_cnt = auto_res_cnt['snv_get_cnt']
  165. auto_snv_get_pct = 100 * auto_snv_get_cnt / auto_request_snv_cnt if auto_request_snv_cnt > 0 else 0
  166. auto_ggl_get_cnt = auto_res_cnt['ggl_get_cnt']
  167. auto_ggl_get_pct = 100 * auto_ggl_get_cnt / auto_request_ggl_cnt if auto_request_ggl_cnt > 0 else 0
  168. auto_all_get_cnt = auto_res_cnt['all_get_cnt']
  169. auto_all_get_pct = 100 * auto_all_get_cnt / auto_trace_cnt if auto_trace_cnt > 0 else 0
  170. auto_all_get_pct = 100 * auto_all_get_cnt / auto_trace_cnt if auto_trace_cnt > 0 else 0
  171. auto_ctc_shh_cnt = auto_ctc_cnt['shh_ctc_cnt']
  172. auto_ctc_snv_cnt = auto_ctc_cnt['snv_ctc_cnt']
  173. auto_ctc_ggl_cnt = auto_ctc_cnt['ggl_ctc_cnt']
  174. auto_user_cnt = get_auto_user_cnt(dt)
  175. msg = f'''【手动/自动更新效果统计】------------------------------------------
  176. 统计日期: {format_dt}
  177. 1、手动更新
  178. ①手动更新请求总人数:{manual_user_cnt}人
  179. ②手动更新请求总次数:{manual_trace_cnt}次
  180. ③人均请求次数:{manual_trace_avg:.2f}次
  181. ④手动请求bing网址总次数:{manual_web_request_cnt}次
  182. ⑤bing获取到网址的次数及占比:{manual_web_get_cnt}次,{manual_web_get_pct:.2f}%
  183. ⑥单接口获取到联系人次数及占比:{manual_shh_get_cnt}次,{manual_shh_get_pct:.2f}%
  184. ⑦单接口获取到联系人去重总数:{manual_ctc_shh_cnt}
  185. ⑧snovio接口获取到联系人次数及占比:{manual_snv_get_cnt}次,{manual_snv_get_pct:.2f}%
  186. ⑨snovio接口获取到联系人去重总数:{manual_ctc_snv_cnt}
  187. ⑩google爬虫获取到联系人次数及占比:{manual_ggl_get_cnt}次,{manual_ggl_get_pct:.2f}%
  188. ⑪google爬虫获取到联系人去重总数:{manual_ctc_ggl_cnt}
  189. ⑫当日手动更新获得联系方式的总次数:{manual_all_get_cnt}
  190. ⑬当日手动更新解决联系人问题的百分比:{manual_all_get_pct:.2f}%
  191. 2、自动更新
  192. ① 自动更新请求总人数:{auto_user_cnt}人
  193. ② 自动更新请求总次数:{auto_web_request_cnt}次
  194. ③ 人均请求次数:{auto_trace_avg:.2f}次
  195. ④ 自动请求bing网址总次数:{auto_web_request_cnt}次
  196. ⑤ bing获取到网址的次数及占比:{auto_web_get_cnt}次,{auto_web_get_pct:.2f}%
  197. ⑥ 自动请求单接口的总次数:{auto_request_shh_cnt} 次
  198. ⑦ 单接口获取到联系人次数及占比:{auto_shh_get_cnt}次,{auto_shh_get_pct:.2f}%
  199. ⑧ 单接口获取到联系人去重总数:{auto_ctc_shh_cnt}
  200. ⑨ 自动请求snovio接口的总次数:{auto_request_snv_cnt} 次
  201. ⑩ snovio接口获取到联系人次数及占比:{auto_snv_get_cnt}次,{auto_snv_get_pct:.2f}%
  202. ⑪ snovio接口获取到联系人去重总数:{auto_ctc_snv_cnt}
  203. ⑫ 自动请求google爬虫的总次数:{auto_request_ggl_cnt} 次
  204. ⑬ google爬虫获取到联系人次数及占比:{auto_ggl_get_cnt}次,{auto_ggl_get_pct:.2f}%
  205. ⑭ google爬虫获取到联系人去重总数:{auto_ctc_ggl_cnt}
  206. ⑮ 当日自动更新请求联系方式的总次数:{auto_trace_cnt}
  207. ⑯ 当日自动更新获得联系方式的总次数:{auto_all_get_cnt}
  208. ⑰ 当日自动更新解决联系人问题的百分比:{auto_all_get_pct:.2f}%
  209. --------------------------------------------------------------- '''
  210. print(msg)
  211. send_dingtalk_notification(msg)