Эх сурвалжийг харах

feat(dws/usr): dws_usr_user_trade_1d DDL + init + sche(用户 x 品类 x 日 交易宽表 + N=2 回算)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
tianyu.chu 3 өдөр өмнө
parent
commit
805af0eeef

+ 37 - 0
jobs/dws/usr/dws_usr_user_trade_1d.sql

@@ -0,0 +1,37 @@
+-- 作者:tianyu.chu
+-- 日期:2026-05-10
+-- 工单:(无)
+-- 目的:dws_usr_user_trade_1d 日常增量(kb/29 §1.4 + §2):
+--      回算近 2 日(与 dwd N=2 对齐,漂移连锁补偿,kb/93 ADR-09):扫 dwd.dt IN (${dt}, ${pdt}) +
+--      过滤 category IS NOT NULL(脏数据 other 已在 DIM 清洗为 NULL,kb/28 §3.2)+
+--      GROUP BY (user_id, category, dt) 聚合 +
+--      动态分区写入 dws.dt IN (${dt}, ${pdt})
+-- 状态:[草案]
+-- 备注:sched=T,${dt}=T-1,${pdt}=T-2;
+--      回算 N=2 兜底跨零点漂移连锁补偿(dwd 同步,参 kb/93 ADR-09);
+--      INSERT OVERWRITE 动态分区(kb/26 §8 项目默认 DYNAMIC mode):只覆盖 SELECT 出现的 dt 分区,不动其他历史分区;
+--      前置 DS DEPENDENT:dwd_trd_order_pay_apd_d.${dt}
+
+INSERT OVERWRITE TABLE dws.dws_usr_user_trade_1d PARTITION (dt)
+SELECT
+    user_id                                                  AS user_id,
+    category                                                 AS category,
+    COUNT(DISTINCT order_id)                                 AS pay_order_cnt,
+    SUM(purchase_cnt)                                        AS purchase_cnt,
+    SUM(payable_amt_cny)                                     AS payable_amt_cny,
+    SUM(pay_amt_cny)                                         AS pay_amt_cny,
+    SUM(trade_amt_cny)                                       AS trade_amt_cny,
+    SUM(settle_amt_cny)                                      AS settle_amt_cny,
+    SUM(merchant_discount_amt_cny)                           AS merchant_discount_amt_cny,
+    SUM(platform_discount_amt_cny)                           AS platform_discount_amt_cny,
+    SUM(member_discount_amt_cny)                             AS member_discount_amt_cny,
+    SUM(act_discount_amt_cny)                                AS act_discount_amt_cny,
+    SUM(point_deduct_amt_cny)                                AS point_deduct_amt_cny,
+    SUM(shipping_amt_cny)                                    AS shipping_amt_cny,
+    SUM(point)                                               AS point,
+    CURRENT_TIMESTAMP()                                      AS etl_time,
+    dt                                                       AS dt
+FROM dwd.dwd_trd_order_pay_apd_d
+WHERE dt IN ('${dt}', '${pdt}')
+  AND category IS NOT NULL
+GROUP BY user_id, category, dt;

+ 39 - 0
manual/backfill/20260510_dws_usr_user_trade_1d_init.sql

@@ -0,0 +1,39 @@
+-- 作者:tianyu.chu
+-- 日期:2026-05-10
+-- 工单:(无)
+-- 目的:dws_usr_user_trade_1d 初始化(kb/29 §2):
+--      扫 dwd_trd_order_pay_apd_d 全量历史分区 +
+--      过滤 category IS NOT NULL(脏数据 other 已在 DIM 清洗为 NULL,聚合时排除,kb/28 §3.2)+
+--      GROUP BY (user_id, category, dwd.dt) 聚合 +
+--      动态分区写入 dws dt(承袭 dwd 业务时间分区)
+-- 状态:[待执行]
+-- 备注:跑一次后由 jobs/dws/usr/dws_usr_user_trade_1d.sql 接管日常增量;
+--      dt 取 dwd.dt 直接传(dwd 已按 DATE(payment_success_time) 落分区,dws 不再 DATE_FORMAT);
+--      历史业务时间跨 2021-10 ~ ${dt},动态分区上限 SET 提到 2000;
+--      前置:dwd_trd_order_pay_apd_d_init 已跑完(全量历史已落 dwd)
+
+set hive.exec.max.dynamic.partitions=2000;
+set hive.exec.max.dynamic.partitions.pernode=200;
+
+INSERT OVERWRITE TABLE dws.dws_usr_user_trade_1d PARTITION (dt)
+SELECT
+    user_id                                                  AS user_id,
+    category                                                 AS category,
+    COUNT(DISTINCT order_id)                                 AS pay_order_cnt,
+    SUM(purchase_cnt)                                        AS purchase_cnt,
+    SUM(payable_amt_cny)                                     AS payable_amt_cny,
+    SUM(pay_amt_cny)                                         AS pay_amt_cny,
+    SUM(trade_amt_cny)                                       AS trade_amt_cny,
+    SUM(settle_amt_cny)                                      AS settle_amt_cny,
+    SUM(merchant_discount_amt_cny)                           AS merchant_discount_amt_cny,
+    SUM(platform_discount_amt_cny)                           AS platform_discount_amt_cny,
+    SUM(member_discount_amt_cny)                             AS member_discount_amt_cny,
+    SUM(act_discount_amt_cny)                                AS act_discount_amt_cny,
+    SUM(point_deduct_amt_cny)                                AS point_deduct_amt_cny,
+    SUM(shipping_amt_cny)                                    AS shipping_amt_cny,
+    SUM(point)                                               AS point,
+    CURRENT_TIMESTAMP()                                      AS etl_time,
+    dt                                                       AS dt
+FROM dwd.dwd_trd_order_pay_apd_d
+WHERE category IS NOT NULL
+GROUP BY user_id, category, dt;

+ 36 - 0
manual/ddl/dws/usr/dws_usr_user_trade_1d_create.sql

@@ -0,0 +1,36 @@
+-- 作者:tianyu.chu
+-- 日期:2026-05-10
+-- 工单:(无)
+-- 目的:用户 x 品类 x 日 交易主题宽表(kb/29 §2 dws_usr_user_trade_1d)
+-- 状态:[草案]
+-- 备注:粒度 (user_id, category, dt) 唯一(kb/29 §2.2);
+--      来源 dwd_trd_order_pay_apd_d 单源(A3 锁定 1 期不做 refund,kb/29 §2.3);
+--      dt 锚点 = DATE(payment_success_time) 业务时间分区,承袭 dwd(kb/29 §2.4);
+--      回算 N=2 与 dwd 对齐(漂移连锁补偿,kb/93 ADR-09);
+--      不冗余维度退化字段(1 期 scope 服务标签计算,kb/29 §1.3 触发条件再扩);
+--      字段类型对齐 dwd(整数 BIGINT,金额 DECIMAL(20,4),kb/20 §8.4.1)
+
+DROP TABLE IF EXISTS dws.dws_usr_user_trade_1d;
+
+CREATE EXTERNAL TABLE IF NOT EXISTS dws.dws_usr_user_trade_1d (
+    user_id                        BIGINT         COMMENT '用户 id (PK 一,源 dwd.user_id)',
+    category                       STRING         COMMENT '叶子品类 (PK 二,源 dwd.category,DIM 已清洗权威源)',
+    pay_order_cnt                  BIGINT         COMMENT '当日支付订单数 偏好次数口径 COUNT(DISTINCT order_id)',
+    purchase_cnt                   BIGINT         COMMENT '当日支付份数 备用 SUM(purchase_cnt)',
+    payable_amt_cny                DECIMAL(20,4)  COMMENT '当日 GMV SUM(payable_amt_cny)',
+    pay_amt_cny                    DECIMAL(20,4)  COMMENT '当日 Net Revenue 偏好金额口径 SUM(pay_amt_cny)',
+    trade_amt_cny                  DECIMAL(20,4)  COMMENT '当日订单交易金额 SUM(trade_amt_cny)',
+    settle_amt_cny                 DECIMAL(20,4)  COMMENT '当日结算金额 SUM(settle_amt_cny)',
+    merchant_discount_amt_cny      DECIMAL(20,4)  COMMENT '当日商家折扣 SUM(merchant_discount_amt_cny)',
+    platform_discount_amt_cny      DECIMAL(20,4)  COMMENT '当日平台券折扣 SUM(platform_discount_amt_cny)',
+    member_discount_amt_cny        DECIMAL(20,4)  COMMENT '当日会员折扣 SUM(member_discount_amt_cny)',
+    act_discount_amt_cny           DECIMAL(20,4)  COMMENT '当日活动折扣 SUM(act_discount_amt_cny)',
+    point_deduct_amt_cny           DECIMAL(20,4)  COMMENT '当日积分抵扣金额 SUM(point_deduct_amt_cny)',
+    shipping_amt_cny               DECIMAL(20,4)  COMMENT '当日运费 SUM(shipping_amt_cny)',
+    point                          BIGINT         COMMENT '当日消耗积分 SUM(point)',
+    etl_time                       TIMESTAMP      COMMENT 'ETL 处理时间'
+)
+COMMENT '用户 x 品类 x 日 交易主题宽表'
+PARTITIONED BY (dt STRING)
+STORED AS ORC
+LOCATION '/user/hive/warehouse/dws.db/dws_usr_user_trade_1d';