| 123456789101112131415161718192021 |
- -- 作者:tianyu.chu
- -- 日期:2026-06-10
- -- 工单:(无)
- -- 目的:埋点 gz(已 hdfs put 到临时目录)→ 解析 _source、脱敏 properties → 写 raw 薄表(es_id / event_name / 脱敏后 _source JSON)
- -- 状态:[待执行]
- -- 备注:gz 由包装脚本 jobs/raw/usr/raw_usr_traces_apd_d.py 逐日 put 到 /tmp/raw_usr_traces/${dt}/;
- -- mask_source UDF = dw_base/udf/business/spark_traces_udf.py(-u 加载);脱敏配置经 ADD FILE 分发;
- -- es_id/event_name 不敏感、原生 get_json_object 取;dt 静态 = 文件日
- ADD FILE conf/tracking-mask.ini;
- CREATE OR REPLACE TEMPORARY VIEW traces_gz_text
- USING text
- OPTIONS (path '/tmp/raw_usr_traces/${dt}/');
- INSERT OVERWRITE TABLE raw.raw_usr_traces_apd_d PARTITION (dt = '${dt}')
- SELECT
- get_json_object(value, '$._id') AS es_id,
- get_json_object(value, '$._source.event') AS event_name,
- mask_source(value) AS raw_json
- FROM traces_gz_text;
|