Quellcode durchsuchen

feat(backfill): 加 7 张 raw 表 create_time 锚点存量 ini

复用 _inc_d 表(不另建 _his_o);column 与各自 inc_d.ini 对齐;
usr 两张保留内嵌 [mask] 段;日期占位由 DS 上调度时填。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
tianyu.chu vor 5 Tagen
Ursprung
Commit
90d41e870e

+ 30 - 0
manual/backfill/20260501_raw_prd_checklist_base_info_inc_d_history.ini

@@ -0,0 +1,30 @@
+; 作者:tianyu.chu
+; 日期:2026-05-01
+; 工单:(无)
+; 目的:PG public.checklist_base_info → Hive raw.raw_prd_checklist_base_info_inc_d
+;       一次性历史回填,用 -backfill 模式按日切分跑(复用 _inc_d 表,不另建 _his_o)
+; 状态:[待执行]
+;
+; 配套 DDL:manual/ddl/raw/prd/raw_prd_checklist_base_info_inc_d_create.sql
+
+[reader]
+dataSource = postgresql/prd-poyee-aliyun
+database = hobby_stocks
+table = public.checklist_base_info
+column = id,code,year,sport,manufacturer,sets,display_name,type,num,use_num,status,remark,create_time,create_by,update_time,update_by,lot,del_flg,act_point_type,parent_id,sets_version,merchant_id,merchant_name,share,share_to_merchant,custom,panini_config_id,import_failure_cause,check_failure_cause,import_status,review_msg,sub_type,title,data_version,sport_blend,display_name_translations
+columnType =
+where = create_time >= '${start_date}' AND create_time < '${stop_date}'
+querySql =
+splitPk = id
+fetchSize = 1000
+
+[writer]
+dataSource = hdfs/prd-hdfs-ha
+path = /user/hive/warehouse/raw.db/raw_prd_checklist_base_info_inc_d/dt=${dt}/
+column = id,code,year,sport,manufacturer,sets,display_name,type,num,use_num,status,remark,create_time,create_by,update_time,update_by,lot,del_flg,act_point_type,parent_id,sets_version,merchant_id,merchant_name,share,share_to_merchant,custom,panini_config_id,import_failure_cause,check_failure_cause,import_status,review_msg,sub_type,title,data_version,sport_blend,display_name_translations
+columnType =
+fileType = orc
+fileName = raw_prd_checklist_base_info_inc_d
+encoding = UTF-8
+writeMode = truncate
+fieldDelimiter = \t

+ 30 - 0
manual/backfill/20260501_raw_prd_panini_checklist_base_info_inc_d_history.ini

@@ -0,0 +1,30 @@
+; 作者:tianyu.chu
+; 日期:2026-05-01
+; 工单:(无)
+; 目的:PG public.panini_checklist_base_info → Hive raw.raw_prd_panini_checklist_base_info_inc_d
+;       一次性历史回填,用 -backfill 模式按日切分跑(复用 _inc_d 表,不另建 _his_o)
+; 状态:[待执行]
+;
+; 配套 DDL:manual/ddl/raw/prd/raw_prd_panini_checklist_base_info_inc_d_create.sql
+
+[reader]
+dataSource = postgresql/prd-poyee-aliyun
+database = hobby_stocks
+table = public.panini_checklist_base_info
+column = id,code,year,sport,manufacturer,sets,sets_version,display_name,type,num,use_num,status,remark,lot,del_flg,carmichael_img_type,create_time,create_by,update_time,update_by,import_type,report_flag,base_config,base_price,min_reference_price,max_reference_price,prop1,prop2,prop3,prop4,data_version,first_sport,sets_display_name
+columnType =
+where = create_time >= '${start_date}' AND create_time < '${stop_date}'
+querySql =
+splitPk = id
+fetchSize = 1000
+
+[writer]
+dataSource = hdfs/prd-hdfs-ha
+path = /user/hive/warehouse/raw.db/raw_prd_panini_checklist_base_info_inc_d/dt=${dt}/
+column = id,code,year,sport,manufacturer,sets,sets_version,display_name,type,num,use_num,status,remark,lot,del_flg,carmichael_img_type,create_time,create_by,update_time,update_by,import_type,report_flag,base_config,base_price,min_reference_price,max_reference_price,prop1,prop2,prop3,prop4,data_version,first_sport,sets_display_name
+columnType =
+fileType = orc
+fileName = raw_prd_panini_checklist_base_info_inc_d
+encoding = UTF-8
+writeMode = truncate
+fieldDelimiter = \t

+ 30 - 0
manual/backfill/20260501_raw_prd_panini_checklist_version_config_inc_d_history.ini

@@ -0,0 +1,30 @@
+; 作者:tianyu.chu
+; 日期:2026-05-01
+; 工单:(无)
+; 目的:PG public.panini_checklist_version_config → Hive raw.raw_prd_panini_checklist_version_config_inc_d
+;       一次性历史回填,用 -backfill 模式按日切分跑(复用 _inc_d 表,不另建 _his_o)
+; 状态:[待执行]
+;
+; 配套 DDL:manual/ddl/raw/prd/raw_prd_panini_checklist_version_config_inc_d_create.sql
+
+[reader]
+dataSource = postgresql/prd-poyee-aliyun
+database = hobby_stocks
+table = public.panini_checklist_version_config
+column = id,panini_list_id,set_version,carmichael_img_type,import_type,report_flag,base_config,base_price,min_reference_price,max_reference_price,num,use_num,status,remark,del_flg,create_time,create_by,update_time,update_by,prop1,prop2,prop3,prop4,tag,display_name,presale_time,max_box,league,issuing_time,issuing_price,display_name_translations,sale_time,open_time
+columnType =
+where = create_time >= '${start_date}' AND create_time < '${stop_date}'
+querySql =
+splitPk = id
+fetchSize = 1000
+
+[writer]
+dataSource = hdfs/prd-hdfs-ha
+path = /user/hive/warehouse/raw.db/raw_prd_panini_checklist_version_config_inc_d/dt=${dt}/
+column = id,panini_list_id,set_version,carmichael_img_type,import_type,report_flag,base_config,base_price,min_reference_price,max_reference_price,num,use_num,status,remark,del_flg,create_time,create_by,update_time,update_by,prop1,prop2,prop3,prop4,tag,display_name,presale_time,max_box,league,issuing_time,issuing_price,display_name_translations,sale_time,open_time
+columnType =
+fileType = orc
+fileName = raw_prd_panini_checklist_version_config_inc_d
+encoding = UTF-8
+writeMode = truncate
+fieldDelimiter = \t

+ 30 - 0
manual/backfill/20260501_raw_shp_tzy_merchant_info_inc_d_history.ini

@@ -0,0 +1,30 @@
+; 作者:tianyu.chu
+; 日期:2026-05-01
+; 工单:(无)
+; 目的:PG public.tzy_merchant_info → Hive raw.raw_shp_tzy_merchant_info_inc_d
+;       一次性历史回填,用 -backfill 模式按日切分跑(复用 _inc_d 表,不另建 _his_o)
+; 状态:[待执行]
+;
+; 配套 DDL:manual/ddl/raw/shp/raw_shp_tzy_merchant_info_inc_d_create.sql
+
+[reader]
+dataSource = postgresql/prd-poyee-aliyun
+database = hobby_stocks
+table = public.tzy_merchant_info
+column = id,appid,user_id,username,name,status,remark,create_by,create_time,update_by,update_time,code,fans,sale_num,applet_auth,applet_lives_auth,applet_lives_role,commission_rate,prop_json,sort_rate,check_status,live_type,living_auth_config,goods_sold_num,hot_config,tag_config,mall_role,living_time,express_level,del_flg,group_show_name,main_business,min_card_num,dy_name,current_month_score,member_level,member_name,member_medal,prefer_valid_time,tag_id,show_status,point_type,refund_limit_day,open_act_discount,reputation_score,hide_stock,version,total_sold_num,shipping_cost_config,merchant_group_id
+columnType =
+where = create_time >= '${start_date}' AND create_time < '${stop_date}'
+querySql =
+splitPk = id
+fetchSize = 1000
+
+[writer]
+dataSource = hdfs/prd-hdfs-ha
+path = /user/hive/warehouse/raw.db/raw_shp_tzy_merchant_info_inc_d/dt=${dt}/
+column = id,appid,user_id,username,name,status,remark,create_by,create_time,update_by,update_time,code,fans,sale_num,applet_auth,applet_lives_auth,applet_lives_role,commission_rate,prop_json,sort_rate,check_status,live_type,living_auth_config,goods_sold_num,hot_config,tag_config,mall_role,living_time,express_level,del_flg,group_show_name,main_business,min_card_num,dy_name,current_month_score,member_level,member_name,member_medal,prefer_valid_time,tag_id,show_status,point_type,refund_limit_day,open_act_discount,reputation_score,hide_stock,version,total_sold_num,shipping_cost_config,merchant_group_id
+columnType =
+fileType = orc
+fileName = raw_shp_tzy_merchant_info_inc_d
+encoding = UTF-8
+writeMode = truncate
+fieldDelimiter = \t

+ 30 - 0
manual/backfill/20260501_raw_trd_card_group_info_inc_d_history.ini

@@ -0,0 +1,30 @@
+; 作者:tianyu.chu
+; 日期:2026-05-01
+; 工单:(无)
+; 目的:PG public.card_group_info → Hive raw.raw_trd_card_group_info_inc_d
+;       一次性历史回填,用 -backfill 模式按日切分跑(复用 _inc_d 表,不另建 _his_o)
+; 状态:[待执行]
+;
+; 配套 DDL:manual/ddl/raw/trd/raw_trd_card_group_info_inc_d_create.sql
+
+[reader]
+dataSource = postgresql/prd-poyee-aliyun
+database = hobby_stocks
+table = public.card_group_info
+column = id,merchant_id,appid,name,code,status,specs,type,random_type,total_price,copies,unit_price,sold_copies,release_time,cycle,show_applet,title,msg,remark,create_time,update_by,update_time,order_quota_min,order_quota_max,user_quota_max,start_time,marketing_info,reviewmsg,lock,commission_rate,year,sport,manufacturer,sets,act,config,info_config,total_num,banner_end_time,add_banner,finished_time,display_name,group_sets_no,close_payment_time,confirm_send_time,close_payment_status,open_card,close_payment_record,group_full_time,live_create_time,live_start_time,live_end_time,report_start_time,report_end_time,report_review_num,report_review_first_time,report_review_end_time,review_hold_time,review_approval_time,review_num,config_json,free_flag,group_info_search_index_col,mer_name,change_type,act_price,act_config_json,real_sold_num,weight,hot_type,team_first,prop1,prop2,prop3,point_rate,point_max,point_min,list_id,list_code,mix_copies,sub_type,act_point_type,payment_method,payment_total_price,payment_commission,payment_finished_price,payment_remain_price,payment_online_price,exclusive,has_bg,merchant_sort,del_flg,del_time,review_account,act_id,sold_end_time,panini_list_id,hot_type_config,goods_type,report_flag,use_coupon,user_level,custom,gift_card_id,group_show_name,min_card_num,act_type,waring_type,compensation_status,point_type,first_act_config,gift_config,version,extra_prop,use_member_discount,merchant_open
+columnType =
+where = create_time >= '${start_date}' AND create_time < '${stop_date}'
+querySql =
+splitPk = id
+fetchSize = 1000
+
+[writer]
+dataSource = hdfs/prd-hdfs-ha
+path = /user/hive/warehouse/raw.db/raw_trd_card_group_info_inc_d/dt=${dt}/
+column = id,merchant_id,appid,name,code,status,specs,type,random_type,total_price,copies,unit_price,sold_copies,release_time,cycle,show_applet,title,msg,remark,create_time,update_by,update_time,order_quota_min,order_quota_max,user_quota_max,start_time,marketing_info,reviewmsg,lock,commission_rate,year,sport,manufacturer,sets,act,config,info_config,total_num,banner_end_time,add_banner,finished_time,display_name,group_sets_no,close_payment_time,confirm_send_time,close_payment_status,open_card,close_payment_record,group_full_time,live_create_time,live_start_time,live_end_time,report_start_time,report_end_time,report_review_num,report_review_first_time,report_review_end_time,review_hold_time,review_approval_time,review_num,config_json,free_flag,group_info_search_index_col,mer_name,change_type,act_price,act_config_json,real_sold_num,weight,hot_type,team_first,prop1,prop2,prop3,point_rate,point_max,point_min,list_id,list_code,mix_copies,sub_type,act_point_type,payment_method,payment_total_price,payment_commission,payment_finished_price,payment_remain_price,payment_online_price,exclusive,has_bg,merchant_sort,del_flg,del_time,review_account,act_id,sold_end_time,panini_list_id,hot_type_config,goods_type,report_flag,use_coupon,user_level,custom,gift_card_id,group_show_name,min_card_num,act_type,waring_type,compensation_status,point_type,first_act_config,gift_config,version,extra_prop,use_member_discount,merchant_open
+columnType =
+fileType = orc
+fileName = raw_trd_card_group_info_inc_d
+encoding = UTF-8
+writeMode = truncate
+fieldDelimiter = \t

+ 33 - 0
manual/backfill/20260501_raw_usr_app_base_user_inc_d_history.ini

@@ -0,0 +1,33 @@
+; 作者:tianyu.chu
+; 日期:2026-05-01
+; 工单:(无)
+; 目的:PG public.app_base_user → Hive raw.raw_usr_app_base_user_inc_d
+;       一次性历史回填,用 -backfill 模式按日切分跑(复用 _inc_d 表,不另建 _his_o)
+; 状态:[待执行]
+;
+; 配套 DDL:manual/ddl/raw/usr/raw_usr_app_base_user_inc_d_create.sql
+
+[reader]
+dataSource = postgresql/prd-poyee-aliyun
+database = hobby_stocks
+table = public.app_base_user
+column = id,appid,point,level,register_channel,status,del_flg,remark,create_by,create_time,update_by,update_time,username,growth_num,code,notify_flag,user_id,notify_type,face_verify,open_psd,refuse_pick_up,prop1,prop2,prop3,prop4,window_open,open_invoice,blacklist,id_card,member_level,member_name,current_month_growth,member_init_flag,member_keep_growth,register_ip_addr,register_addr,login_ip_addr,login_addr,notify_top_show,voice_reminder,vibrate_reminder,consume_amount,order_total_num,open_card_show,effects_type,live_config_json,cancel_verify_num,version,daily_limit,weekly_limit,monthly_limit,live_anonymous
+columnType =
+where = create_time >= '${start_date}' AND create_time < '${stop_date}'
+querySql =
+splitPk = id
+fetchSize = 1000
+
+[mask]
+id_card = md5
+
+[writer]
+dataSource = hdfs/prd-hdfs-ha
+path = /user/hive/warehouse/raw.db/raw_usr_app_base_user_inc_d/dt=${dt}/
+column = id,appid,point,level,register_channel,status,del_flg,remark,create_by,create_time,update_by,update_time,username,growth_num,code,notify_flag,user_id,notify_type,face_verify,open_psd,refuse_pick_up,prop1,prop2,prop3,prop4,window_open,open_invoice,blacklist,id_card,member_level,member_name,current_month_growth,member_init_flag,member_keep_growth,register_ip_addr,register_addr,login_ip_addr,login_addr,notify_top_show,voice_reminder,vibrate_reminder,consume_amount,order_total_num,open_card_show,effects_type,live_config_json,cancel_verify_num,version,daily_limit,weekly_limit,monthly_limit,live_anonymous
+columnType =
+fileType = orc
+fileName = raw_usr_app_base_user_inc_d
+encoding = UTF-8
+writeMode = truncate
+fieldDelimiter = \t

+ 33 - 0
manual/backfill/20260501_raw_usr_app_user_cert_info_inc_d_history.ini

@@ -0,0 +1,33 @@
+; 作者:tianyu.chu
+; 日期:2026-05-01
+; 工单:(无)
+; 目的:PG public.app_user_cert_info → Hive raw.raw_usr_app_user_cert_info_inc_d
+;       一次性历史回填,用 -backfill 模式按日切分跑(复用 _inc_d 表,不另建 _his_o)
+; 状态:[待执行]
+;
+; 配套 DDL:manual/ddl/raw/usr/raw_usr_app_user_cert_info_inc_d_create.sql
+
+[reader]
+dataSource = postgresql/prd-poyee-aliyun
+database = hobby_stocks
+table = public.app_user_cert_info
+column = id,user_id,cert_birthday,cert_sex,cert_province,cert_city,version,status,del_flag,create_time,update_time
+columnType =
+where = create_time >= '${start_date}' AND create_time < '${stop_date}'
+querySql =
+splitPk = id
+fetchSize = 1000
+
+[mask]
+cert_birthday = month_trunc
+
+[writer]
+dataSource = hdfs/prd-hdfs-ha
+path = /user/hive/warehouse/raw.db/raw_usr_app_user_cert_info_inc_d/dt=${dt}/
+column = id,user_id,cert_birthday,cert_sex,cert_province,cert_city,version,status,del_flag,create_time,update_time
+columnType =
+fileType = orc
+fileName = raw_usr_app_user_cert_info_inc_d
+encoding = UTF-8
+writeMode = truncate
+fieldDelimiter = \t