|
|
@@ -1,19 +1,21 @@
|
|
|
-- 作者:tianyu.chu
|
|
|
--- 日期:2026-05-11
|
|
|
+-- 日期:2026-05-12
|
|
|
-- 工单:(无)
|
|
|
-- 目的:tdm_usr_tag_d 日常调度 + 手动 init 复用(kb/33 §2):
|
|
|
-- INSERT OVERWRITE PARTITION (dt='${dt}') 静态单分区全量重刷;
|
|
|
-- 7 属性(attr) ← dim_usr_user_ful_d.dt='${dt}' 各属性 UNION ALL;
|
|
|
--- 4 偏好窗口(stat) ← dws_usr_user_trade_1d 滚动 30d / y{当年} 累计聚合 UNION ALL;
|
|
|
+-- 4 消费明细统计窗口(stat,1 期拼团订单粒度) ← dwd_trd_order_pay_apd_d 跨层下钻
|
|
|
+-- (1 期 dws 上层无消费方,跨层取数,kb/93 ADR-10)滚动 30d / y{当年} 累计聚合 UNION ALL;
|
|
|
-- EAV 7 字段(kb/33 §1.2);WHERE 源字段 IS NOT NULL / HAVING SUM > 0 过滤空标签
|
|
|
-- 状态:[草案]
|
|
|
-- 备注:sched=T,${dt}=T-1(项目级 globalParam,kb/26);
|
|
|
-- 30d 滚动起点 = DATE_SUB(${dt}, 29) [T-30, T-1];
|
|
|
-- y{当年} 累计起点 = '${dt}' 前 4 位 + '0101' = 当年 01-01;
|
|
|
-- tag_code 当年通过 SUBSTR('${dt}', 1, 4) 拼接,跨年自然滚动(27-01-01 起自动从 y2026 切到 y2027);
|
|
|
+-- stat 段 WHERE 必带 order_type='group'(1 期拼团粒度,kb/34 §编码规则);
|
|
|
-- 属性细节口径(出生世代切片 / sex 原值 / 等)按 kb/33 §6 默认,业务回头校准换字段不动 schema(EAV 收益);
|
|
|
-- birthday_cert 业务库 STRING 多格式,REPLACE 去 '-' 后取 yyyyMM/yyyy 兼容 'yyyy-MM-dd' 与 'yyyyMMdd';
|
|
|
--- 前置 DS DEPENDENT:dim_usr_user_ful_d.${dt} + dws_usr_user_trade_1d.${dt}
|
|
|
+-- 前置 DS DEPENDENT:dim_usr_user_ful_d.${dt} + dwd_trd_order_pay_apd_d.${dt}
|
|
|
|
|
|
INSERT OVERWRITE TABLE tdm.tdm_usr_tag_d PARTITION (dt='${dt}')
|
|
|
SELECT entity_id, tag_code, tag_value, tag_type, confidence, etl_time FROM (
|
|
|
@@ -59,7 +61,7 @@ SELECT entity_id, tag_code, tag_value, tag_type, confidence, etl_time FROM (
|
|
|
|
|
|
UNION ALL
|
|
|
|
|
|
- -- 4. usr_city 城市(取 cert_city,未实名 NULL 已 filter)
|
|
|
+ -- 4. usr_city 城市(取 cert_city,未实名 NULL 已 filter;真值为区级如"上海市徐汇区")
|
|
|
SELECT
|
|
|
user_id AS entity_id,
|
|
|
'usr_city' AS tag_code,
|
|
|
@@ -126,70 +128,78 @@ SELECT entity_id, tag_code, tag_value, tag_type, confidence, etl_time FROM (
|
|
|
AND LENGTH(REPLACE(birthday_cert, '-', '')) >= 4
|
|
|
AND SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 4) RLIKE '^[12][0-9]{3}$'
|
|
|
|
|
|
- -- ============ 偏好类 stat 16 品类 × 4 窗口 = 64 个 ============
|
|
|
+ -- ============ 消费明细统计 stat 16 品类 × 4 窗口 = 64 个(1 期拼团粒度) ============
|
|
|
|
|
|
UNION ALL
|
|
|
|
|
|
- -- 8. usr_pref_trade_{category}_amt_30d 16 品类 × 近 30 天金额
|
|
|
+ -- 8. usr_trade_{category}_amt_30d 16 品类 × 近 30 天金额
|
|
|
SELECT
|
|
|
user_id AS entity_id,
|
|
|
- CONCAT('usr_pref_trade_', category, '_amt_30d') AS tag_code,
|
|
|
+ CONCAT('usr_trade_', category, '_amt_30d') AS tag_code,
|
|
|
CAST(SUM(pay_amt_cny) AS STRING) AS tag_value,
|
|
|
'stat' AS tag_type,
|
|
|
CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
CURRENT_TIMESTAMP() AS etl_time
|
|
|
- FROM dws.dws_usr_user_trade_1d
|
|
|
+ FROM dwd.dwd_trd_order_pay_apd_d
|
|
|
WHERE dt BETWEEN DATE_FORMAT(DATE_SUB(FROM_UNIXTIME(UNIX_TIMESTAMP('${dt}', 'yyyyMMdd')), 29), 'yyyyMMdd')
|
|
|
AND '${dt}'
|
|
|
+ AND order_type = 'group'
|
|
|
+ AND category IS NOT NULL
|
|
|
GROUP BY user_id, category
|
|
|
HAVING SUM(pay_amt_cny) > 0
|
|
|
|
|
|
UNION ALL
|
|
|
|
|
|
- -- 9. usr_pref_trade_{category}_cnt_30d 16 品类 × 近 30 天次数
|
|
|
+ -- 9. usr_trade_{category}_cnt_30d 16 品类 × 近 30 天次数
|
|
|
SELECT
|
|
|
user_id AS entity_id,
|
|
|
- CONCAT('usr_pref_trade_', category, '_cnt_30d') AS tag_code,
|
|
|
- CAST(SUM(pay_order_cnt) AS STRING) AS tag_value,
|
|
|
+ CONCAT('usr_trade_', category, '_cnt_30d') AS tag_code,
|
|
|
+ CAST(COUNT(DISTINCT order_id) AS STRING) AS tag_value,
|
|
|
'stat' AS tag_type,
|
|
|
CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
CURRENT_TIMESTAMP() AS etl_time
|
|
|
- FROM dws.dws_usr_user_trade_1d
|
|
|
+ FROM dwd.dwd_trd_order_pay_apd_d
|
|
|
WHERE dt BETWEEN DATE_FORMAT(DATE_SUB(FROM_UNIXTIME(UNIX_TIMESTAMP('${dt}', 'yyyyMMdd')), 29), 'yyyyMMdd')
|
|
|
AND '${dt}'
|
|
|
+ AND order_type = 'group'
|
|
|
+ AND category IS NOT NULL
|
|
|
GROUP BY user_id, category
|
|
|
- HAVING SUM(pay_order_cnt) > 0
|
|
|
+ HAVING COUNT(DISTINCT order_id) > 0
|
|
|
|
|
|
UNION ALL
|
|
|
|
|
|
- -- 10. usr_pref_trade_{category}_amt_y{当年} 16 品类 × 当年累计金额
|
|
|
+ -- 10. usr_trade_{category}_amt_y{当年} 16 品类 × 当年累计金额
|
|
|
SELECT
|
|
|
user_id AS entity_id,
|
|
|
- CONCAT('usr_pref_trade_', category, '_amt_y',
|
|
|
+ CONCAT('usr_trade_', category, '_amt_y',
|
|
|
SUBSTR('${dt}', 1, 4)) AS tag_code,
|
|
|
CAST(SUM(pay_amt_cny) AS STRING) AS tag_value,
|
|
|
'stat' AS tag_type,
|
|
|
CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
CURRENT_TIMESTAMP() AS etl_time
|
|
|
- FROM dws.dws_usr_user_trade_1d
|
|
|
+ FROM dwd.dwd_trd_order_pay_apd_d
|
|
|
WHERE dt BETWEEN CONCAT(SUBSTR('${dt}', 1, 4), '0101') AND '${dt}'
|
|
|
+ AND order_type = 'group'
|
|
|
+ AND category IS NOT NULL
|
|
|
GROUP BY user_id, category
|
|
|
HAVING SUM(pay_amt_cny) > 0
|
|
|
|
|
|
UNION ALL
|
|
|
|
|
|
- -- 11. usr_pref_trade_{category}_cnt_y{当年} 16 品类 × 当年累计次数
|
|
|
+ -- 11. usr_trade_{category}_cnt_y{当年} 16 品类 × 当年累计次数
|
|
|
SELECT
|
|
|
user_id AS entity_id,
|
|
|
- CONCAT('usr_pref_trade_', category, '_cnt_y',
|
|
|
+ CONCAT('usr_trade_', category, '_cnt_y',
|
|
|
SUBSTR('${dt}', 1, 4)) AS tag_code,
|
|
|
- CAST(SUM(pay_order_cnt) AS STRING) AS tag_value,
|
|
|
+ CAST(COUNT(DISTINCT order_id) AS STRING) AS tag_value,
|
|
|
'stat' AS tag_type,
|
|
|
CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
CURRENT_TIMESTAMP() AS etl_time
|
|
|
- FROM dws.dws_usr_user_trade_1d
|
|
|
+ FROM dwd.dwd_trd_order_pay_apd_d
|
|
|
WHERE dt BETWEEN CONCAT(SUBSTR('${dt}', 1, 4), '0101') AND '${dt}'
|
|
|
+ AND order_type = 'group'
|
|
|
+ AND category IS NOT NULL
|
|
|
GROUP BY user_id, category
|
|
|
- HAVING SUM(pay_order_cnt) > 0
|
|
|
+ HAVING COUNT(DISTINCT order_id) > 0
|
|
|
|
|
|
) t;
|