| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116 |
- -- 作者:tianyu.chu
- -- 日期:2026-05-09
- -- 工单:(无)
- -- 目的:dim_usr_user_ful_d 日常增量(kb/28 §1.3):
- -- 昨日 dim (dt=${pdt}) + 今日 ods 增量 (dt=${dt}) 按 user_id 合并去重,落 dt=${dt} 单分区
- -- 状态:[草案]
- -- 备注:sched=T,${dt}=T-1,${pdt}=T-2;
- -- (a) 找今日变更 user_id 集合(base 或 cert 任一变)
- -- (b) 这些 user_id 扫 ods dt<=${dt} 重新取最新 base + cert(兼顾 base 没变只 cert 变 / cert 没变只 base 变)
- -- (c) 昨日 dim 中没变的 user_id 直接保留
- -- (d) UNION ALL 写入 dim dt=${dt} 单分区;
- -- cert_info update_time 几乎全空,ORDER BY 用 COALESCE(update_time, create_time);
- -- 前置 DS DEPENDENT:ods_usr_app_base_user_inc_d.dt=${dt} + ods_usr_app_user_cert_info_inc_d.dt=${dt} + dim_usr_user_ful_d.dt=${pdt}
- INSERT OVERWRITE TABLE dim.dim_usr_user_ful_d PARTITION (dt='${dt}')
- WITH today_changed_users AS (
- -- 今日变更的 user_id 集合(base 或 cert 任一变)
- SELECT DISTINCT id AS user_id FROM ods.ods_usr_app_base_user_inc_d WHERE dt = '${dt}'
- UNION
- SELECT DISTINCT user_id FROM ods.ods_usr_app_user_cert_info_inc_d WHERE dt = '${dt}'
- ),
- today_base_latest AS (
- -- 今日变更 user 的最新 base 版本(扫 ods <= ${dt},只取 today_changed_users 集合)
- SELECT *
- FROM (
- SELECT *,
- ROW_NUMBER() OVER (
- PARTITION BY id
- ORDER BY COALESCE(update_time, create_time) DESC
- ) AS rn
- FROM ods.ods_usr_app_base_user_inc_d
- WHERE dt <= '${dt}'
- AND id IN (SELECT user_id FROM today_changed_users)
- ) t
- WHERE t.rn = 1
- AND (is_deleted IS NULL OR is_deleted = FALSE)
- ),
- today_cert_latest AS (
- -- 今日变更 user 的最新 cert 版本
- SELECT *
- FROM (
- SELECT *,
- ROW_NUMBER() OVER (
- PARTITION BY user_id
- ORDER BY COALESCE(update_time, create_time) DESC
- ) AS rn
- FROM ods.ods_usr_app_user_cert_info_inc_d
- WHERE dt <= '${dt}'
- AND user_id IN (SELECT user_id FROM today_changed_users)
- ) t
- WHERE t.rn = 1
- AND (is_deleted IS NULL OR is_deleted = FALSE)
- ),
- today_dim_rebuild AS (
- -- 今日变更 user 重 join 形成新 dim 行
- SELECT
- bu.id AS user_id,
- bu.appid AS appid,
- bu.username AS username,
- bu.code AS code,
- ci.cert_sex AS sex_cert,
- ci.cert_birthday AS birthday_cert,
- ci.cert_province AS cert_province,
- ci.cert_city AS cert_city,
- CASE WHEN ci.user_id IS NOT NULL THEN TRUE ELSE FALSE END AS is_cert,
- bu.id_card AS id_card,
- bu.face_verify AS face_verify,
- bu.cancel_verify_num AS cancel_verify_num,
- bu.register_channel AS register_channel,
- bu.register_addr AS register_addr,
- bu.register_ip_addr AS register_ip_addr,
- bu.create_time AS reg_create_time,
- bu.login_addr AS login_addr,
- bu.login_ip_addr AS login_ip_addr,
- bu.level AS level,
- bu.member_level AS member_level,
- bu.member_name AS member_name,
- bu.growth_num AS growth_num,
- bu.current_month_growth AS current_month_growth,
- bu.member_keep_growth AS member_keep_growth,
- bu.member_init_flag AS member_init_flag,
- bu.point AS point,
- bu.consume_amount AS consume_amount_cny,
- bu.order_total_num AS order_total_num,
- bu.status AS status,
- bu.blacklist AS blacklist,
- bu.refuse_pick_up AS refuse_pick_up,
- bu.notify_flag AS notify_flag,
- bu.open_invoice AS open_invoice,
- bu.open_psd AS open_psd,
- bu.daily_limit AS daily_limit,
- bu.weekly_limit AS weekly_limit,
- bu.monthly_limit AS monthly_limit,
- bu.update_time AS last_update_time,
- bu.is_deleted AS is_deleted,
- CURRENT_TIMESTAMP() AS etl_time
- FROM today_base_latest bu
- LEFT JOIN today_cert_latest ci ON bu.id = ci.user_id
- ),
- yesterday_dim_unchanged AS (
- -- 昨日 dim 中今日没变的 user_id 直接保留
- SELECT
- user_id, appid, username, code, sex_cert, birthday_cert, cert_province, cert_city, is_cert,
- id_card, face_verify, cancel_verify_num, register_channel, register_addr, register_ip_addr,
- reg_create_time, login_addr, login_ip_addr, level, member_level, member_name,
- growth_num, current_month_growth, member_keep_growth, member_init_flag, point,
- consume_amount_cny, order_total_num, status, blacklist, refuse_pick_up, notify_flag,
- open_invoice, open_psd, daily_limit, weekly_limit, monthly_limit,
- last_update_time, is_deleted, etl_time
- FROM dim.dim_usr_user_ful_d
- WHERE dt = '${pdt}'
- AND user_id NOT IN (SELECT user_id FROM today_changed_users)
- )
- SELECT * FROM today_dim_rebuild
- UNION ALL
- SELECT * FROM yesterday_dim_unchanged;
|