فهرست منبع

feat(ods): 埋点 ods 解析表 ods_usr_traces_apd_d DDL + 解析 SQL

tianyu.chu 4 روز پیش
والد
کامیت
429446cd1e
2فایلهای تغییر یافته به همراه98 افزوده شده و 0 حذف شده
  1. 47 0
      jobs/ods/usr/ods_usr_traces_apd_d.sql
  2. 51 0
      manual/ddl/ods/usr/ods_usr_traces_apd_d_create.sql

+ 47 - 0
jobs/ods/usr/ods_usr_traces_apd_d.sql

@@ -0,0 +1,47 @@
+-- 作者:tianyu.chu
+-- 日期:2026-06-10
+-- 工单:(无)
+-- 目的:埋点 raw → ods,解析脱敏后 _source JSON 拍平公共属性 + 保留 params_json;按事件日 dt 静态分区写入
+-- 状态:[待执行]
+-- 备注:ES 按事件日分索引,raw.dt = 事件日,故 dt 静态写 ${dt}(不用动态分区,避开动态覆盖风险);
+--       事件不可变,无双源 union / 无 update_time 去重;es_id 单文件内唯一;时区随集群(东八区)
+
+INSERT OVERWRITE TABLE ods.ods_usr_traces_apd_d PARTITION (dt = '${dt}')
+SELECT
+    es_id                                                                              AS es_id,
+    event_name                                                                         AS event_name,
+    get_json_object(raw_json, '$.type')                                                AS evt_type,
+    CAST(CAST(get_json_object(raw_json, '$.time')      AS BIGINT) / 1000 AS TIMESTAMP) AS event_time,
+    CAST(CAST(get_json_object(raw_json, '$.flushTime') AS BIGINT) / 1000 AS TIMESTAMP) AS flush_time,
+    get_json_object(raw_json, '$.loginId')                                             AS login_id,
+    get_json_object(raw_json, '$.distinctId')                                          AS distinct_id,
+    get_json_object(raw_json, '$.anonymousId')                                         AS anonymous_id,
+    get_json_object(raw_json, '$.properties.userId')                                   AS user_id,
+    get_json_object(raw_json, '$.properties.userName')                                 AS user_name,
+    get_json_object(raw_json, '$.properties.userLevel')                                AS user_lvl,
+    get_json_object(raw_json, '$.lib.lib')                                             AS lib,
+    get_json_object(raw_json, '$.lib.libVersion')                                      AS lib_version,
+    get_json_object(raw_json, '$.lib.libMethod')                                       AS lib_method,
+    get_json_object(raw_json, '$.properties.appId')                                    AS app_id,
+    get_json_object(raw_json, '$.properties.appName')                                  AS app_name,
+    get_json_object(raw_json, '$.properties.appVersion')                               AS app_version,
+    get_json_object(raw_json, '$.properties.wgtVersion')                               AS wgt_version,
+    get_json_object(raw_json, '$.properties.os')                                       AS os,
+    get_json_object(raw_json, '$.properties.osVersion')                                AS os_version,
+    get_json_object(raw_json, '$.properties.manufacturer')                             AS manufacturer,
+    get_json_object(raw_json, '$.properties.brand')                                    AS brand,
+    get_json_object(raw_json, '$.properties.model')                                    AS model,
+    get_json_object(raw_json, '$.properties.deviceId')                                 AS device_id,
+    get_json_object(raw_json, '$.properties.networkType')                              AS network_type,
+    get_json_object(raw_json, '$.properties.carrier')                                  AS carrier,
+    CAST(get_json_object(raw_json, '$.properties.wifi')                 AS BOOLEAN)     AS wifi,
+    CAST(get_json_object(raw_json, '$.properties.screenWidth')          AS BIGINT)      AS screen_width,
+    CAST(get_json_object(raw_json, '$.properties.screenHeight')         AS BIGINT)      AS screen_height,
+    CAST(get_json_object(raw_json, '$.properties.timezoneOffset')       AS BIGINT)      AS timezone_offset,
+    CAST(get_json_object(raw_json, '$.properties.isFirstDay')           AS BOOLEAN)     AS is_first_day,
+    CAST(get_json_object(raw_json, '$.properties.isFirstTime')          AS BOOLEAN)     AS is_first_time,
+    CAST(get_json_object(raw_json, '$.properties.resumeFromBackground') AS BOOLEAN)     AS resume_from_background,
+    CAST(get_json_object(raw_json, '$.properties.eventDuration')        AS BIGINT)      AS event_duration,
+    get_json_object(raw_json, '$.properties.params')                                   AS params_json
+FROM raw.raw_usr_traces_apd_d
+WHERE dt = '${dt}';

+ 51 - 0
manual/ddl/ods/usr/ods_usr_traces_apd_d_create.sql

@@ -0,0 +1,51 @@
+-- 作者:tianyu.chu
+-- 日期:2026-06-10
+-- 工单:(无)
+-- 目的:埋点 raw → ods,解析脱敏后 _source JSON:公共属性 typed 拍平成列 + params 半结构化(params_json);按事件日 dt 分区
+-- 状态:[待执行]
+-- 备注:埋点 ods 特例——非业务库类型恢复,是 JSON 解析(kb/20 §0.2 旁);params 不 per-event 拍平(event explosion,见 ADR-13);
+--       web 端字段(platform/merchantId/title 等)+ lib_detail 不拍平,回查走 raw raw_json;配套解析 SQL jobs/ods/usr/ods_usr_traces_apd_d.sql
+
+DROP TABLE IF EXISTS ods.ods_usr_traces_apd_d;
+
+CREATE EXTERNAL TABLE IF NOT EXISTS ods.ods_usr_traces_apd_d (
+    es_id                  STRING    COMMENT 'ES 文档 _id,去重键',
+    event_name             STRING    COMMENT '事件名(_source.event)',
+    evt_type               STRING    COMMENT '事件类型(track/track_signup)',
+    event_time             TIMESTAMP COMMENT '事件发生时间(_source.time 毫秒转,东八区)',
+    flush_time             TIMESTAMP COMMENT 'SDK 上报时间(_source.flushTime)',
+    login_id               STRING    COMMENT '登录用户 ID(未登录=0)',
+    distinct_id            STRING    COMMENT 'SDK 唯一标识',
+    anonymous_id           STRING    COMMENT '匿名设备 UUID',
+    user_id                STRING    COMMENT '业务用户 ID(properties.userId)',
+    user_name              STRING    COMMENT '用户昵称',
+    user_lvl               STRING    COMMENT '用户等级',
+    lib                    STRING    COMMENT 'SDK 平台(iOS/Android/js)',
+    lib_version            STRING    COMMENT 'SDK 版本',
+    lib_method             STRING    COMMENT '埋点触发方式',
+    app_id                 STRING    COMMENT 'App 包名',
+    app_name               STRING    COMMENT 'App 名称',
+    app_version            STRING    COMMENT 'App 版本',
+    wgt_version            STRING    COMMENT '热更新版本',
+    os                     STRING    COMMENT '操作系统',
+    os_version             STRING    COMMENT '系统版本',
+    manufacturer           STRING    COMMENT '设备制造商',
+    brand                  STRING    COMMENT '设备品牌(Android)',
+    model                  STRING    COMMENT '设备型号',
+    device_id              STRING    COMMENT '设备 ID',
+    network_type           STRING    COMMENT '网络类型',
+    carrier                STRING    COMMENT '运营商',
+    wifi                   BOOLEAN   COMMENT '是否 WIFI',
+    screen_width           BIGINT    COMMENT '屏幕宽',
+    screen_height          BIGINT    COMMENT '屏幕高',
+    timezone_offset        BIGINT    COMMENT '时区偏移分钟(×-1)',
+    is_first_day           BOOLEAN   COMMENT '是否首日',
+    is_first_time          BOOLEAN   COMMENT '是否首次启动($AppStart)',
+    resume_from_background  BOOLEAN  COMMENT '是否后台恢复($AppStart)',
+    event_duration         BIGINT    COMMENT '事件时长($AppEnd)',
+    params_json            STRING    COMMENT '事件专属 params(脱敏后 JSON,不 per-event 拍平)'
+)
+COMMENT '埋点 ods 层(解析拍平,params 半结构化)'
+PARTITIONED BY (dt STRING COMMENT 'yyyymmdd,事件日')
+STORED AS ORC
+LOCATION '/user/hive/warehouse/ods.db/ods_usr_traces_apd_d';