| 1234567891011121314151617181920212223242526272829303132 |
- ; DataX 同步配置:PG public.app_user_cert_info → Hive test.raw_usr_app_user_cert_info_inc_d
- ;
- ; 源 datasource:datasource/postgresql/dev-poyee-xiaoxu.ini
- ; 目标 datasource:datasource/hdfs/prd-hdfs-ha.ini(HA 集群,nameservice1)
- ; 时间锚点:create_time
- ; 敏感字段:
- ; - user_cert_data / cert_no / cert_real_name:column/querySql 都不查
- ; - cert_birthday:querySql 里 TO_CHAR 截到月粒度,PG 端执行完成脱敏(敏感原值不出业务库)
- ;
- ; 注:key = value 不对齐——parse_ddl 的 grep "path =" 要求字面子串,多空格对齐会破匹配
- [reader]
- dataSource = postgresql/dev-poyee-xiaoxu
- database = hs_sync_data
- table = public.app_user_cert_info
- column = id,user_id,cert_birthday,cert_sex,cert_province,cert_city,version,status,del_flag,create_time,update_time
- columnType =
- where =
- querySql = SELECT id, user_id, TO_CHAR(cert_birthday, 'YYYY-MM') AS cert_birthday, cert_sex, cert_province, cert_city, version, status, del_flag, create_time, update_time FROM public.app_user_cert_info WHERE create_time >= '${start_date}' AND create_time < '${stop_date}'
- splitPk = id
- fetchSize = 1000
- [writer]
- dataSource = hdfs/prd-hdfs-ha
- path = /user/hive/warehouse/test.db/raw_usr_app_user_cert_info_inc_d/dt=${dt}/
- column = id,user_id,cert_birthday,cert_sex,cert_province,cert_city,version,status,del_flag,create_time,update_time
- columnType =
- fileType = orc
- fileName = raw_usr_app_user_cert_info_inc_d
- encoding = UTF-8
- writeMode = truncate
- fieldDelimiter = \t
|