|
@@ -0,0 +1,195 @@
|
|
|
|
|
+-- 作者:tianyu.chu
|
|
|
|
|
+-- 日期:2026-05-11
|
|
|
|
|
+-- 工单:(无)
|
|
|
|
|
+-- 目的:tdm_usr_tag_d 日常调度 + 手动 init 复用(kb/33 §2):
|
|
|
|
|
+-- INSERT OVERWRITE PARTITION (dt='${dt}') 静态单分区全量重刷;
|
|
|
|
|
+-- 7 属性(attr) ← dim_usr_user_ful_d.dt='${dt}' 各属性 UNION ALL;
|
|
|
|
|
+-- 4 偏好窗口(stat) ← dws_usr_user_trade_1d 滚动 30d / y{当年} 累计聚合 UNION ALL;
|
|
|
|
|
+-- EAV 7 字段(kb/33 §1.2);WHERE 源字段 IS NOT NULL / HAVING SUM > 0 过滤空标签
|
|
|
|
|
+-- 状态:[草案]
|
|
|
|
|
+-- 备注:sched=T,${dt}=T-1(项目级 globalParam,kb/26);
|
|
|
|
|
+-- 30d 滚动起点 = DATE_SUB(${dt}, 29) [T-30, T-1];
|
|
|
|
|
+-- y{当年} 累计起点 = '${dt}' 前 4 位 + '0101' = 当年 01-01;
|
|
|
|
|
+-- tag_code 当年通过 SUBSTR('${dt}', 1, 4) 拼接,跨年自然滚动(27-01-01 起自动从 y2026 切到 y2027);
|
|
|
|
|
+-- 属性细节口径(出生世代切片 / sex 原值 / 等)按 kb/33 §6 默认,业务回头校准换字段不动 schema(EAV 收益);
|
|
|
|
|
+-- birthday_cert 业务库 STRING 多格式,REPLACE 去 '-' 后取 yyyyMM/yyyy 兼容 'yyyy-MM-dd' 与 'yyyyMMdd';
|
|
|
|
|
+-- 前置 DS DEPENDENT:dim_usr_user_ful_d.${dt} + dws_usr_user_trade_1d.${dt}
|
|
|
|
|
+
|
|
|
|
|
+INSERT OVERWRITE TABLE tdm.tdm_usr_tag_d PARTITION (dt='${dt}')
|
|
|
|
|
+SELECT entity_id, tag_code, tag_value, tag_type, confidence, etl_time FROM (
|
|
|
|
|
+
|
|
|
|
|
+ -- ============ 属性类 attr 7 个 ============
|
|
|
|
|
+
|
|
|
|
|
+ -- 1. usr_level 用户等级
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ user_id AS entity_id,
|
|
|
|
|
+ 'usr_level' AS tag_code,
|
|
|
|
|
+ CAST(member_level AS STRING) AS tag_value,
|
|
|
|
|
+ 'attr' AS tag_type,
|
|
|
|
|
+ CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
|
|
+ CURRENT_TIMESTAMP() AS etl_time
|
|
|
|
|
+ FROM dim.dim_usr_user_ful_d
|
|
|
|
|
+ WHERE dt = '${dt}' AND member_level IS NOT NULL
|
|
|
|
|
+
|
|
|
|
|
+ UNION ALL
|
|
|
|
|
+
|
|
|
|
|
+ -- 2. usr_is_cert 实名认证情况
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ user_id AS entity_id,
|
|
|
|
|
+ 'usr_is_cert' AS tag_code,
|
|
|
|
|
+ CAST(is_cert AS STRING) AS tag_value,
|
|
|
|
|
+ 'attr' AS tag_type,
|
|
|
|
|
+ CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
|
|
+ CURRENT_TIMESTAMP() AS etl_time
|
|
|
|
|
+ FROM dim.dim_usr_user_ful_d
|
|
|
|
|
+ WHERE dt = '${dt}' AND is_cert IS NOT NULL
|
|
|
|
|
+
|
|
|
|
|
+ UNION ALL
|
|
|
|
|
+
|
|
|
|
|
+ -- 3. usr_sex 性别(原值入,待业务确认映射规则)
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ user_id AS entity_id,
|
|
|
|
|
+ 'usr_sex' AS tag_code,
|
|
|
|
|
+ CAST(sex_cert AS STRING) AS tag_value,
|
|
|
|
|
+ 'attr' AS tag_type,
|
|
|
|
|
+ CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
|
|
+ CURRENT_TIMESTAMP() AS etl_time
|
|
|
|
|
+ FROM dim.dim_usr_user_ful_d
|
|
|
|
|
+ WHERE dt = '${dt}' AND sex_cert IS NOT NULL
|
|
|
|
|
+
|
|
|
|
|
+ UNION ALL
|
|
|
|
|
+
|
|
|
|
|
+ -- 4. usr_city 城市(取 cert_city,未实名 NULL 已 filter)
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ user_id AS entity_id,
|
|
|
|
|
+ 'usr_city' AS tag_code,
|
|
|
|
|
+ cert_city AS tag_value,
|
|
|
|
|
+ 'attr' AS tag_type,
|
|
|
|
|
+ CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
|
|
+ CURRENT_TIMESTAMP() AS etl_time
|
|
|
|
|
+ FROM dim.dim_usr_user_ful_d
|
|
|
|
|
+ WHERE dt = '${dt}' AND cert_city IS NOT NULL
|
|
|
|
|
+
|
|
|
|
|
+ UNION ALL
|
|
|
|
|
+
|
|
|
|
|
+ -- 5. usr_register_time 注册时间(yyyyMMdd)
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ user_id AS entity_id,
|
|
|
|
|
+ 'usr_register_time' AS tag_code,
|
|
|
|
|
+ DATE_FORMAT(reg_create_time, 'yyyyMMdd') AS tag_value,
|
|
|
|
|
+ 'attr' AS tag_type,
|
|
|
|
|
+ CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
|
|
+ CURRENT_TIMESTAMP() AS etl_time
|
|
|
|
|
+ FROM dim.dim_usr_user_ful_d
|
|
|
|
|
+ WHERE dt = '${dt}' AND reg_create_time IS NOT NULL
|
|
|
|
|
+
|
|
|
|
|
+ UNION ALL
|
|
|
|
|
+
|
|
|
|
|
+ -- 6. usr_birth_month 生日年月(yyyyMM,REPLACE 去 '-' 兼容多格式)
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ user_id AS entity_id,
|
|
|
|
|
+ 'usr_birth_month' AS tag_code,
|
|
|
|
|
+ SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 6) AS tag_value,
|
|
|
|
|
+ 'attr' AS tag_type,
|
|
|
|
|
+ CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
|
|
+ CURRENT_TIMESTAMP() AS etl_time
|
|
|
|
|
+ FROM dim.dim_usr_user_ful_d
|
|
|
|
|
+ WHERE dt = '${dt}'
|
|
|
|
|
+ AND birthday_cert IS NOT NULL
|
|
|
|
|
+ AND LENGTH(REPLACE(birthday_cert, '-', '')) >= 6
|
|
|
|
|
+ AND SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 4) RLIKE '^[12][0-9]{3}$'
|
|
|
|
|
+
|
|
|
|
|
+ UNION ALL
|
|
|
|
|
+
|
|
|
|
|
+ -- 7. usr_generation 出生世代(10 年切片中文 N 后,kb/33 §6)
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ user_id AS entity_id,
|
|
|
|
|
+ 'usr_generation' AS tag_code,
|
|
|
|
|
+ CASE
|
|
|
|
|
+ WHEN CAST(SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 4) AS INT) < 1960 THEN '60前'
|
|
|
|
|
+ WHEN CAST(SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 4) AS INT) < 1970 THEN '60后'
|
|
|
|
|
+ WHEN CAST(SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 4) AS INT) < 1980 THEN '70后'
|
|
|
|
|
+ WHEN CAST(SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 4) AS INT) < 1985 THEN '80后'
|
|
|
|
|
+ WHEN CAST(SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 4) AS INT) < 1990 THEN '85后'
|
|
|
|
|
+ WHEN CAST(SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 4) AS INT) < 1995 THEN '90后'
|
|
|
|
|
+ WHEN CAST(SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 4) AS INT) < 2000 THEN '95后'
|
|
|
|
|
+ WHEN CAST(SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 4) AS INT) < 2005 THEN '00后'
|
|
|
|
|
+ WHEN CAST(SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 4) AS INT) < 2010 THEN '05后'
|
|
|
|
|
+ ELSE '10后'
|
|
|
|
|
+ END AS tag_value,
|
|
|
|
|
+ 'attr' AS tag_type,
|
|
|
|
|
+ CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
|
|
+ CURRENT_TIMESTAMP() AS etl_time
|
|
|
|
|
+ FROM dim.dim_usr_user_ful_d
|
|
|
|
|
+ WHERE dt = '${dt}'
|
|
|
|
|
+ AND birthday_cert IS NOT NULL
|
|
|
|
|
+ AND LENGTH(REPLACE(birthday_cert, '-', '')) >= 4
|
|
|
|
|
+ AND SUBSTR(REPLACE(birthday_cert, '-', ''), 1, 4) RLIKE '^[12][0-9]{3}$'
|
|
|
|
|
+
|
|
|
|
|
+ -- ============ 偏好类 stat 16 品类 × 4 窗口 = 64 个 ============
|
|
|
|
|
+
|
|
|
|
|
+ UNION ALL
|
|
|
|
|
+
|
|
|
|
|
+ -- 8. usr_pref_trade_{category}_amt_30d 16 品类 × 近 30 天金额
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ user_id AS entity_id,
|
|
|
|
|
+ CONCAT('usr_pref_trade_', category, '_amt_30d') AS tag_code,
|
|
|
|
|
+ CAST(SUM(pay_amt_cny) AS STRING) AS tag_value,
|
|
|
|
|
+ 'stat' AS tag_type,
|
|
|
|
|
+ CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
|
|
+ CURRENT_TIMESTAMP() AS etl_time
|
|
|
|
|
+ FROM dws.dws_usr_user_trade_1d
|
|
|
|
|
+ WHERE dt BETWEEN DATE_FORMAT(DATE_SUB(FROM_UNIXTIME(UNIX_TIMESTAMP('${dt}', 'yyyyMMdd')), 29), 'yyyyMMdd')
|
|
|
|
|
+ AND '${dt}'
|
|
|
|
|
+ GROUP BY user_id, category
|
|
|
|
|
+ HAVING SUM(pay_amt_cny) > 0
|
|
|
|
|
+
|
|
|
|
|
+ UNION ALL
|
|
|
|
|
+
|
|
|
|
|
+ -- 9. usr_pref_trade_{category}_cnt_30d 16 品类 × 近 30 天次数
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ user_id AS entity_id,
|
|
|
|
|
+ CONCAT('usr_pref_trade_', category, '_cnt_30d') AS tag_code,
|
|
|
|
|
+ CAST(SUM(pay_order_cnt) AS STRING) AS tag_value,
|
|
|
|
|
+ 'stat' AS tag_type,
|
|
|
|
|
+ CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
|
|
+ CURRENT_TIMESTAMP() AS etl_time
|
|
|
|
|
+ FROM dws.dws_usr_user_trade_1d
|
|
|
|
|
+ WHERE dt BETWEEN DATE_FORMAT(DATE_SUB(FROM_UNIXTIME(UNIX_TIMESTAMP('${dt}', 'yyyyMMdd')), 29), 'yyyyMMdd')
|
|
|
|
|
+ AND '${dt}'
|
|
|
|
|
+ GROUP BY user_id, category
|
|
|
|
|
+ HAVING SUM(pay_order_cnt) > 0
|
|
|
|
|
+
|
|
|
|
|
+ UNION ALL
|
|
|
|
|
+
|
|
|
|
|
+ -- 10. usr_pref_trade_{category}_amt_y{当年} 16 品类 × 当年累计金额
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ user_id AS entity_id,
|
|
|
|
|
+ CONCAT('usr_pref_trade_', category, '_amt_y',
|
|
|
|
|
+ SUBSTR('${dt}', 1, 4)) AS tag_code,
|
|
|
|
|
+ CAST(SUM(pay_amt_cny) AS STRING) AS tag_value,
|
|
|
|
|
+ 'stat' AS tag_type,
|
|
|
|
|
+ CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
|
|
+ CURRENT_TIMESTAMP() AS etl_time
|
|
|
|
|
+ FROM dws.dws_usr_user_trade_1d
|
|
|
|
|
+ WHERE dt BETWEEN CONCAT(SUBSTR('${dt}', 1, 4), '0101') AND '${dt}'
|
|
|
|
|
+ GROUP BY user_id, category
|
|
|
|
|
+ HAVING SUM(pay_amt_cny) > 0
|
|
|
|
|
+
|
|
|
|
|
+ UNION ALL
|
|
|
|
|
+
|
|
|
|
|
+ -- 11. usr_pref_trade_{category}_cnt_y{当年} 16 品类 × 当年累计次数
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ user_id AS entity_id,
|
|
|
|
|
+ CONCAT('usr_pref_trade_', category, '_cnt_y',
|
|
|
|
|
+ SUBSTR('${dt}', 1, 4)) AS tag_code,
|
|
|
|
|
+ CAST(SUM(pay_order_cnt) AS STRING) AS tag_value,
|
|
|
|
|
+ 'stat' AS tag_type,
|
|
|
|
|
+ CAST(1.0 AS DECIMAL(5,4)) AS confidence,
|
|
|
|
|
+ CURRENT_TIMESTAMP() AS etl_time
|
|
|
|
|
+ FROM dws.dws_usr_user_trade_1d
|
|
|
|
|
+ WHERE dt BETWEEN CONCAT(SUBSTR('${dt}', 1, 4), '0101') AND '${dt}'
|
|
|
|
|
+ GROUP BY user_id, category
|
|
|
|
|
+ HAVING SUM(pay_order_cnt) > 0
|
|
|
|
|
+
|
|
|
|
|
+) t;
|