Просмотр исходного кода

feat(dim/usr): dim_usr_user_ful_d DDL + init + sche(用户维度全量快照)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
tianyu.chu 2 дней назад
Родитель
Сommit
5a28815

+ 116 - 0
jobs/dim/usr/dim_usr_user_ful_d.sql

@@ -0,0 +1,116 @@
+-- 作者:tianyu.chu
+-- 日期:2026-05-09
+-- 工单:(无)
+-- 目的:dim_usr_user_ful_d 日常增量(kb/28 §1.3):
+--       昨日 dim (dt=${pdt}) + 今日 ods 增量 (dt=${dt}) 按 user_id 合并去重,落 dt=${dt} 单分区
+-- 状态:[草案]
+-- 备注:sched=T,${dt}=T-1,${pdt}=T-2;
+--       (a) 找今日变更 user_id 集合(base 或 cert 任一变)
+--       (b) 这些 user_id 扫 ods dt<=${dt} 重新取最新 base + cert(兼顾 base 没变只 cert 变 / cert 没变只 base 变)
+--       (c) 昨日 dim 中没变的 user_id 直接保留
+--       (d) UNION ALL 写入 dim dt=${dt} 单分区;
+--       cert_info update_time 几乎全空,ORDER BY 用 COALESCE(update_time, create_time);
+--       前置 DS DEPENDENT:ods_usr_app_base_user_inc_d.dt=${dt} + ods_usr_app_user_cert_info_inc_d.dt=${dt} + dim_usr_user_ful_d.dt=${pdt}
+
+INSERT OVERWRITE TABLE dim.dim_usr_user_ful_d PARTITION (dt='${dt}')
+WITH today_changed_users AS (
+    -- 今日变更的 user_id 集合(base 或 cert 任一变)
+    SELECT DISTINCT id AS user_id FROM ods.ods_usr_app_base_user_inc_d WHERE dt = '${dt}'
+    UNION
+    SELECT DISTINCT user_id FROM ods.ods_usr_app_user_cert_info_inc_d WHERE dt = '${dt}'
+),
+today_base_latest AS (
+    -- 今日变更 user 的最新 base 版本(扫 ods <= ${dt},只取 today_changed_users 集合)
+    SELECT *
+    FROM (
+        SELECT *,
+            ROW_NUMBER() OVER (
+                PARTITION BY id
+                ORDER BY COALESCE(update_time, create_time) DESC
+            ) AS rn
+        FROM ods.ods_usr_app_base_user_inc_d
+        WHERE dt <= '${dt}'
+          AND id IN (SELECT user_id FROM today_changed_users)
+    ) t
+    WHERE t.rn = 1
+      AND (is_deleted IS NULL OR is_deleted = FALSE)
+),
+today_cert_latest AS (
+    -- 今日变更 user 的最新 cert 版本
+    SELECT *
+    FROM (
+        SELECT *,
+            ROW_NUMBER() OVER (
+                PARTITION BY user_id
+                ORDER BY COALESCE(update_time, create_time) DESC
+            ) AS rn
+        FROM ods.ods_usr_app_user_cert_info_inc_d
+        WHERE dt <= '${dt}'
+          AND user_id IN (SELECT user_id FROM today_changed_users)
+    ) t
+    WHERE t.rn = 1
+      AND (is_deleted IS NULL OR is_deleted = FALSE)
+),
+today_dim_rebuild AS (
+    -- 今日变更 user 重 join 形成新 dim 行
+    SELECT
+        bu.id                              AS user_id,
+        bu.appid                           AS appid,
+        bu.username                        AS username,
+        bu.code                            AS code,
+        ci.cert_sex                        AS sex_cert,
+        ci.cert_birthday                   AS birthday_cert,
+        ci.cert_province                   AS cert_province,
+        ci.cert_city                       AS cert_city,
+        CASE WHEN ci.user_id IS NOT NULL THEN TRUE ELSE FALSE END AS is_cert,
+        bu.id_card                         AS id_card,
+        bu.face_verify                     AS face_verify,
+        bu.cancel_verify_num               AS cancel_verify_num,
+        bu.register_channel                AS register_channel,
+        bu.register_addr                   AS register_addr,
+        bu.register_ip_addr                AS register_ip_addr,
+        bu.create_time                     AS reg_create_time,
+        bu.login_addr                      AS login_addr,
+        bu.login_ip_addr                   AS login_ip_addr,
+        bu.level                           AS level,
+        bu.member_level                    AS member_level,
+        bu.member_name                     AS member_name,
+        bu.growth_num                      AS growth_num,
+        bu.current_month_growth            AS current_month_growth,
+        bu.member_keep_growth              AS member_keep_growth,
+        bu.member_init_flag                AS member_init_flag,
+        bu.point                           AS point,
+        bu.consume_amount                  AS consume_amount_cny,
+        bu.order_total_num                 AS order_total_num,
+        bu.status                          AS status,
+        bu.blacklist                       AS blacklist,
+        bu.refuse_pick_up                  AS refuse_pick_up,
+        bu.notify_flag                     AS notify_flag,
+        bu.open_invoice                    AS open_invoice,
+        bu.open_psd                        AS open_psd,
+        bu.daily_limit                     AS daily_limit,
+        bu.weekly_limit                    AS weekly_limit,
+        bu.monthly_limit                   AS monthly_limit,
+        bu.update_time                     AS last_update_time,
+        bu.is_deleted                      AS is_deleted,
+        CURRENT_TIMESTAMP()                AS etl_time
+    FROM today_base_latest bu
+    LEFT JOIN today_cert_latest ci ON bu.id = ci.user_id
+),
+yesterday_dim_unchanged AS (
+    -- 昨日 dim 中今日没变的 user_id 直接保留
+    SELECT
+        user_id, appid, username, code, sex_cert, birthday_cert, cert_province, cert_city, is_cert,
+        id_card, face_verify, cancel_verify_num, register_channel, register_addr, register_ip_addr,
+        reg_create_time, login_addr, login_ip_addr, level, member_level, member_name,
+        growth_num, current_month_growth, member_keep_growth, member_init_flag, point,
+        consume_amount_cny, order_total_num, status, blacklist, refuse_pick_up, notify_flag,
+        open_invoice, open_psd, daily_limit, weekly_limit, monthly_limit,
+        last_update_time, is_deleted, etl_time
+    FROM dim.dim_usr_user_ful_d
+    WHERE dt = '${pdt}'
+      AND user_id NOT IN (SELECT user_id FROM today_changed_users)
+)
+SELECT * FROM today_dim_rebuild
+UNION ALL
+SELECT * FROM yesterday_dim_unchanged;

+ 83 - 0
manual/backfill/20260509_dim_usr_user_ful_d_init.sql

@@ -0,0 +1,83 @@
+-- 作者:tianyu.chu
+-- 日期:2026-05-09
+-- 工单:(无)
+-- 目的:dim_usr_user_ful_d 初始化(kb/28 §1.3):扫 ods 全量历史分区 + ROW_NUMBER 取每 pk 最新版本,
+--       base_user LEFT JOIN cert_info 落 dim dt=${dt} 单分区
+-- 状态:[待执行]
+-- 备注:${dt} 由 DS 填最新可用 ods dt(首日分区);cert_info update_time 几乎全空,
+--       ORDER BY 用 COALESCE(update_time, create_time) 兜底;
+--       跑一次后由 jobs/dim/usr/dim_usr_user_ful_d.sql 接管日常增量
+
+-- 动态分区不需要(init 单分区写入),但首次会扫 ods 多 dt:
+-- ods.base_user 30 万行 × N dt + ods.cert_info 15 万行 × N dt,ROW_NUMBER 取最新即可
+
+INSERT OVERWRITE TABLE dim.dim_usr_user_ful_d PARTITION (dt='${dt}')
+SELECT
+    bu.id                              AS user_id,
+    bu.appid                           AS appid,
+    bu.username                        AS username,
+    bu.code                            AS code,
+    ci.cert_sex                        AS sex_cert,
+    ci.cert_birthday                   AS birthday_cert,
+    ci.cert_province                   AS cert_province,
+    ci.cert_city                       AS cert_city,
+    CASE WHEN ci.user_id IS NOT NULL THEN TRUE ELSE FALSE END AS is_cert,
+    bu.id_card                         AS id_card,
+    bu.face_verify                     AS face_verify,
+    bu.cancel_verify_num               AS cancel_verify_num,
+    bu.register_channel                AS register_channel,
+    bu.register_addr                   AS register_addr,
+    bu.register_ip_addr                AS register_ip_addr,
+    bu.create_time                     AS reg_create_time,
+    bu.login_addr                      AS login_addr,
+    bu.login_ip_addr                   AS login_ip_addr,
+    bu.level                           AS level,
+    bu.member_level                    AS member_level,
+    bu.member_name                     AS member_name,
+    bu.growth_num                      AS growth_num,
+    bu.current_month_growth            AS current_month_growth,
+    bu.member_keep_growth              AS member_keep_growth,
+    bu.member_init_flag                AS member_init_flag,
+    bu.point                           AS point,
+    bu.consume_amount                  AS consume_amount_cny,
+    bu.order_total_num                 AS order_total_num,
+    bu.status                          AS status,
+    bu.blacklist                       AS blacklist,
+    bu.refuse_pick_up                  AS refuse_pick_up,
+    bu.notify_flag                     AS notify_flag,
+    bu.open_invoice                    AS open_invoice,
+    bu.open_psd                        AS open_psd,
+    bu.daily_limit                     AS daily_limit,
+    bu.weekly_limit                    AS weekly_limit,
+    bu.monthly_limit                   AS monthly_limit,
+    bu.update_time                     AS last_update_time,
+    bu.is_deleted                      AS is_deleted,
+    CURRENT_TIMESTAMP()                AS etl_time
+FROM (
+    SELECT *
+    FROM (
+        SELECT *,
+            ROW_NUMBER() OVER (
+                PARTITION BY id
+                ORDER BY COALESCE(update_time, create_time) DESC
+            ) AS rn
+        FROM ods.ods_usr_app_base_user_inc_d
+        WHERE dt <= '${dt}'
+    ) t
+    WHERE t.rn = 1
+      AND (is_deleted IS NULL OR is_deleted = FALSE)
+) bu
+LEFT JOIN (
+    SELECT *
+    FROM (
+        SELECT *,
+            ROW_NUMBER() OVER (
+                PARTITION BY user_id
+                ORDER BY COALESCE(update_time, create_time) DESC
+            ) AS rn
+        FROM ods.ods_usr_app_user_cert_info_inc_d
+        WHERE dt <= '${dt}'
+    ) t
+    WHERE t.rn = 1
+      AND (is_deleted IS NULL OR is_deleted = FALSE)
+) ci ON bu.id = ci.user_id;

+ 57 - 0
manual/ddl/dim/usr/dim_usr_user_ful_d_create.sql

@@ -0,0 +1,57 @@
+-- 作者:tianyu.chu
+-- 日期:2026-05-09
+-- 工单:(无)
+-- 目的:用户维度 ful_d 全量快照建表(kb/28 §2 dim_usr_user_ful_d)
+-- 状态:[草案]
+-- 备注:base_user LEFT JOIN cert_info 合一;性别/生日仅取 cert(kb/28 §2.2);
+--       默认 ful_d,未来出"看历史属性变化"标签需求或全量快照存储 > 100GB 时迁 zip_d 双轨共存(kb/28 §1.2);
+--       字段类型对齐 ods(整数全 BIGINT,详见 kb/20 §8.4.1)
+
+DROP TABLE IF EXISTS dim.dim_usr_user_ful_d;
+
+CREATE EXTERNAL TABLE IF NOT EXISTS dim.dim_usr_user_ful_d (
+    user_id                BIGINT         COMMENT '用户 id(PK,源 base_user.id)',
+    appid                  STRING         COMMENT '所属程序',
+    username               STRING         COMMENT '账号',
+    code                   STRING         COMMENT '会员码',
+    sex_cert               BIGINT         COMMENT '证件性别(源 cert_info.cert_sex)',
+    birthday_cert          TIMESTAMP      COMMENT '证件生日(源 cert_info.cert_birthday)',
+    cert_province          STRING         COMMENT '证件所在省',
+    cert_city              STRING         COMMENT '证件所在市',
+    is_cert                BOOLEAN        COMMENT '是否实名(cert_info 命中即 TRUE)',
+    id_card                STRING         COMMENT '身份证号(已 md5)',
+    face_verify            BIGINT         COMMENT '人脸识别通过标志',
+    cancel_verify_num      BIGINT         COMMENT '重置实名次数',
+    register_channel       STRING         COMMENT '注册渠道',
+    register_addr          STRING         COMMENT '注册省区',
+    register_ip_addr       STRING         COMMENT '注册 IP',
+    reg_create_time        TIMESTAMP      COMMENT '注册时间(源 base_user.create_time)',
+    login_addr             STRING         COMMENT '上次登陆省区',
+    login_ip_addr          STRING         COMMENT '上次登陆 IP',
+    level                  BIGINT         COMMENT '会员等级',
+    member_level           BIGINT         COMMENT '会员等级(业务库重复字段,先全保留待澄清)',
+    member_name            STRING         COMMENT '会员等级名称',
+    growth_num             BIGINT         COMMENT '成长值',
+    current_month_growth   BIGINT         COMMENT '当月成长值',
+    member_keep_growth     BIGINT         COMMENT '保级所需成长值',
+    member_init_flag       BIGINT         COMMENT '月初初始化标志',
+    point                  BIGINT         COMMENT '积分',
+    consume_amount_cny     DECIMAL(20,4)  COMMENT '业务库后端自带消费总额(口径未对齐数仓,保留作审计对账)',
+    order_total_num        BIGINT         COMMENT '业务库后端自带订单总数(同上)',
+    status                 BIGINT         COMMENT '用户状态',
+    blacklist              BIGINT         COMMENT '黑名单标记',
+    refuse_pick_up         BIGINT         COMMENT '是否拒绝自提',
+    notify_flag            BIGINT         COMMENT '推送是否接受',
+    open_invoice           BIGINT         COMMENT '开票权限',
+    open_psd               BIGINT         COMMENT '支付开关',
+    daily_limit            BIGINT         COMMENT '每日限额提醒',
+    weekly_limit           BIGINT         COMMENT '每周限额提醒',
+    monthly_limit          BIGINT         COMMENT '每月限额',
+    last_update_time       TIMESTAMP      COMMENT '最近更新时间(源 base_user.update_time)',
+    is_deleted             BOOLEAN        COMMENT '软删归一',
+    etl_time               TIMESTAMP      COMMENT 'ETL 处理时间'
+)
+COMMENT '用户维度全量快照表'
+PARTITIONED BY (dt STRING)
+STORED AS ORC
+LOCATION '/user/hive/warehouse/dim.db/dim_usr_user_ful_d';