dim_usr_user_ful_d.sql 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. -- 作者:tianyu.chu
  2. -- 日期:2026-05-09
  3. -- 工单:(无)
  4. -- 目的:dim_usr_user_ful_d 日常增量(kb/28 §1.3):
  5. -- 昨日 dim (dt=${pdt}) + 今日 ods 增量 (dt=${dt}) 按 user_id 合并去重,落 dt=${dt} 单分区
  6. -- 状态:[草案]
  7. -- 备注:sched=T,${dt}=T-1,${pdt}=T-2;
  8. -- (a) 找今日变更 user_id 集合(base 或 cert 任一变)
  9. -- (b) 这些 user_id 扫 ods dt<=${dt} 重新取最新 base + cert(兼顾 base 没变只 cert 变 / cert 没变只 base 变)
  10. -- (c) 昨日 dim 中没变的 user_id 直接保留
  11. -- (d) UNION ALL 写入 dim dt=${dt} 单分区;
  12. -- cert_info update_time 几乎全空,ORDER BY 用 COALESCE(update_time, create_time);
  13. -- 前置 DS DEPENDENT:ods_usr_app_base_user_inc_d.dt=${dt} + ods_usr_app_user_cert_info_inc_d.dt=${dt} + dim_usr_user_ful_d.dt=${pdt}
  14. INSERT OVERWRITE TABLE dim.dim_usr_user_ful_d PARTITION (dt='${dt}')
  15. WITH today_changed_users AS (
  16. -- 今日变更的 user_id 集合(base 或 cert 任一变)
  17. SELECT DISTINCT id AS user_id FROM ods.ods_usr_app_base_user_inc_d WHERE dt = '${dt}'
  18. UNION
  19. SELECT DISTINCT user_id FROM ods.ods_usr_app_user_cert_info_inc_d WHERE dt = '${dt}'
  20. ),
  21. today_base_latest AS (
  22. -- 今日变更 user 的最新 base 版本(扫 ods <= ${dt},只取 today_changed_users 集合)
  23. SELECT *
  24. FROM (
  25. SELECT *,
  26. ROW_NUMBER() OVER (
  27. PARTITION BY id
  28. ORDER BY COALESCE(update_time, create_time) DESC
  29. ) AS rn
  30. FROM ods.ods_usr_app_base_user_inc_d
  31. WHERE dt <= '${dt}'
  32. AND id IN (SELECT user_id FROM today_changed_users)
  33. ) t
  34. WHERE t.rn = 1
  35. ),
  36. today_cert_latest AS (
  37. -- 今日变更 user 的最新 cert 版本
  38. SELECT *
  39. FROM (
  40. SELECT *,
  41. ROW_NUMBER() OVER (
  42. PARTITION BY user_id
  43. ORDER BY COALESCE(update_time, create_time) DESC
  44. ) AS rn
  45. FROM ods.ods_usr_app_user_cert_info_inc_d
  46. WHERE dt <= '${dt}'
  47. AND user_id IN (SELECT user_id FROM today_changed_users)
  48. ) t
  49. WHERE t.rn = 1
  50. ),
  51. today_dim_rebuild AS (
  52. -- 今日变更 user 重 join 形成新 dim 行
  53. SELECT
  54. bu.id AS user_id,
  55. bu.appid AS appid,
  56. bu.username AS username,
  57. bu.code AS code,
  58. ci.cert_sex AS sex_cert,
  59. ci.cert_birthday AS birthday_cert,
  60. ci.cert_province AS cert_province,
  61. ci.cert_city AS cert_city,
  62. CASE WHEN ci.user_id IS NOT NULL THEN TRUE ELSE FALSE END AS is_cert,
  63. bu.id_card AS id_card,
  64. bu.face_verify AS face_verify,
  65. bu.cancel_verify_num AS cancel_verify_num,
  66. bu.register_channel AS register_channel,
  67. bu.register_addr AS register_addr,
  68. bu.register_ip_addr AS register_ip_addr,
  69. bu.create_time AS reg_create_time,
  70. bu.login_addr AS login_addr,
  71. bu.login_ip_addr AS login_ip_addr,
  72. bu.level AS level,
  73. bu.member_level AS member_level,
  74. bu.member_name AS member_name,
  75. bu.growth_num AS growth_num,
  76. bu.current_month_growth AS current_month_growth,
  77. bu.member_keep_growth AS member_keep_growth,
  78. bu.member_init_flag AS member_init_flag,
  79. bu.point AS point,
  80. bu.consume_amount AS consume_amount_cny,
  81. bu.order_total_num AS order_total_num,
  82. bu.status AS status,
  83. bu.blacklist AS blacklist,
  84. bu.refuse_pick_up AS refuse_pick_up,
  85. bu.notify_flag AS notify_flag,
  86. bu.open_invoice AS open_invoice,
  87. bu.open_psd AS open_psd,
  88. bu.daily_limit AS daily_limit,
  89. bu.weekly_limit AS weekly_limit,
  90. bu.monthly_limit AS monthly_limit,
  91. bu.update_time AS last_update_time,
  92. bu.is_deleted AS is_deleted,
  93. CURRENT_TIMESTAMP() AS etl_time
  94. FROM today_base_latest bu
  95. LEFT JOIN today_cert_latest ci ON bu.id = ci.user_id
  96. ),
  97. yesterday_dim_unchanged AS (
  98. -- 昨日 dim 中今日没变的 user_id 直接保留
  99. SELECT
  100. user_id, appid, username, code, sex_cert, birthday_cert, cert_province, cert_city, is_cert,
  101. id_card, face_verify, cancel_verify_num, register_channel, register_addr, register_ip_addr,
  102. reg_create_time, login_addr, login_ip_addr, level, member_level, member_name,
  103. growth_num, current_month_growth, member_keep_growth, member_init_flag, point,
  104. consume_amount_cny, order_total_num, status, blacklist, refuse_pick_up, notify_flag,
  105. open_invoice, open_psd, daily_limit, weekly_limit, monthly_limit,
  106. last_update_time, is_deleted, etl_time
  107. FROM dim.dim_usr_user_ful_d
  108. WHERE dt = '${pdt}'
  109. AND user_id NOT IN (SELECT user_id FROM today_changed_users)
  110. )
  111. SELECT * FROM today_dim_rebuild
  112. UNION ALL
  113. SELECT * FROM yesterday_dim_unchanged;