Procházet zdrojové kódy

test(integration): 加大表压测 card_group_order 91 字段裁剪版入库

- tests/integration/datax/import/stress/ 新目录(并列 hive_import/hdfs_export)
- ini: prd-poyee-aliyun 数据源 + 91 字段(砍 28 敏感/冗余字段:
  支付/退款/收件人/物流/快递/流水号/tenant_id/version 等)
- hive_raw.sql: 配套建表 DDL,含 DROP IF EXISTS 清理先前 119 字段老表

压测方式迁移到 -backfill 能力,不再需要 workspace/stress_20260423/run_stress.sh
的 bash while 循环(原 workspace 保留不动,gitignore)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
tianyu.chu před 1 týdnem
rodič
revize
8e3bf68a4b

+ 103 - 0
tests/integration/datax/import/stress/hive_raw.sql

@@ -0,0 +1,103 @@
+-- 压测 Hive raw 表(裁剪 28 敏感/冗余字段后 91 字段)
+-- 对齐 ini:tests/integration/datax/import/stress/raw_trd_card_group_order_info_inc_d.ini
+-- 注:DROP 老表以防与先前 119 字段 schema 冲突
+
+DROP TABLE IF EXISTS test.raw_trd_card_group_order_info_inc_d;
+
+CREATE EXTERNAL TABLE IF NOT EXISTS test.raw_trd_card_group_order_info_inc_d (
+    id                             STRING COMMENT 'id',
+    group_info_id                  STRING COMMENT '组团信息id',
+    merchant_id                    STRING COMMENT '商家id',
+    user_id                        STRING COMMENT '用户id',
+    shipping_address_id            STRING COMMENT '收货地址id',
+    purchase_count                 STRING COMMENT '购买数量',
+    order_no                       STRING COMMENT '订单编码',
+    accounts_payable               STRING COMMENT '应付款',
+    actual_payment                 STRING COMMENT '实付款',
+    payment_type                   STRING COMMENT '支付方式-交易类型',
+    payment_time                   STRING COMMENT '支付时间',
+    coupon                         STRING COMMENT '优惠券',
+    discount                       STRING COMMENT '折扣',
+    status                         STRING COMMENT '订单状态',
+    remark                         STRING COMMENT '备注',
+    create_time                    STRING COMMENT '创建时间',
+    create_by                      STRING COMMENT '创建人',
+    update_time                    STRING COMMENT '更新时间',
+    update_by                      STRING COMMENT '更新人',
+    payment_status                 STRING COMMENT '交易状态',
+    payment_status_desc            STRING COMMENT '交易状态描述',
+    payment_success_time           STRING COMMENT '支付完成时间',
+    del_flg                        STRING COMMENT '删除标记:0=正常,1=删除',
+    curier_company                 STRING COMMENT '快递公司',
+    refund_fee                     STRING COMMENT '退款金额',
+    refund_time                    STRING COMMENT '退款时间',
+    anonymous                      STRING COMMENT '是否匿名',
+    pick_up_type                   STRING COMMENT '提货方式',
+    ship_time                      STRING COMMENT '发货时间',
+    refund_success_time            STRING COMMENT '退款成功时间',
+    refund_recv_accout             STRING COMMENT '退款入账账户',
+    refund_account                 STRING COMMENT '退款资金来源',
+    refund_request_source          STRING COMMENT '退款发起来源',
+    card_price                     STRING COMMENT '应付款',
+    act_price                      STRING COMMENT '应付款',
+    goods_price_json               STRING COMMENT '价格json',
+    payment_sub_type               STRING COMMENT '支付子分类',
+    team_first                     STRING COMMENT '买队优先队伍',
+    refuse_status                  STRING COMMENT '是否接受累积发货,0申请,1拒绝,2同意',
+    prop1                          STRING COMMENT '备用',
+    prop2                          STRING COMMENT '备用',
+    prop3                          STRING COMMENT '备用',
+    point                          STRING COMMENT '消耗积分',
+    order_type                     STRING COMMENT '订单类型',
+    trade_amount                   STRING COMMENT '订单交易金额',
+    refund_type                    STRING COMMENT '退款类型',
+    refund_reason                  STRING COMMENT '订单退换原因',
+    evaluation                     STRING COMMENT '订单评价',
+    user_refund_time               STRING COMMENT '退换申请时间',
+    refund_status                  STRING COMMENT '退款状态',
+    merchant_refund_reason         STRING COMMENT '商家拒绝原因',
+    point_deduct                   STRING COMMENT '积分抵扣金额',
+    shipping_cost                  STRING COMMENT '运费',
+    merchant_remark                STRING COMMENT '商家备注',
+    pay_record                     STRING COMMENT '是否重复支付:1=是',
+    order_sub_type                 STRING COMMENT '订单子类型',
+    give_user_code                 STRING COMMENT '赠与人',
+    give_order_id                  STRING COMMENT '赠与关联订单id',
+    read_flag                      STRING COMMENT '赠送未读0和1',
+    give_num                       STRING COMMENT '赠送个数',
+    invoice_id                     STRING COMMENT '发票记录id',
+    combination_no                 STRING COMMENT '拆分订单关联编号',
+    open_self                      STRING COMMENT '是否用户自己拆卡',
+    refund_desc                    STRING COMMENT '退款原因详细描述',
+    goods_allocate                 STRING COMMENT '卡密是否分配',
+    close_payment_status           STRING COMMENT '打款状态',
+    close_payment_time             STRING COMMENT '打款时间',
+    finished_time                  STRING COMMENT '订单结束时间',
+    expire_time                    STRING COMMENT '过期时间',
+    settlement_amount              STRING COMMENT '结算金额',
+    platform_coupon                STRING COMMENT '平台优惠券id',
+    platform_discount              STRING COMMENT '平台优惠劵折扣',
+    discount_amount                STRING COMMENT '折扣金额',
+    member_discount                STRING COMMENT '会员折扣',
+    shipping_free_id               STRING COMMENT '运费券id',
+    shipping_free_amount           STRING COMMENT '运费券金额',
+    discount_point                 STRING COMMENT '折扣积分',
+    un_shipped_num                 STRING COMMENT '精美卡片未发货数量',
+    pre_un_shipped_num             STRING COMMENT '拼豆订单提醒用户申请时间',
+    wait_shipped_num               STRING COMMENT '精美卡片等待发货数量',
+    pre_wait_shipped_num           STRING COMMENT '用户支付拼豆订单运费时间',
+    refuse_time                    STRING COMMENT '用户同意累计发货时间',
+    refuse_notice                  STRING COMMENT '累计发货通知提醒',
+    pickup_time                    STRING COMMENT '揽收时间',
+    waring_type                    STRING COMMENT '风险异常类型',
+    waring_status                  STRING COMMENT '风险异常状态',
+    point_type                     STRING COMMENT '使用积分类型',
+    delivery_end_time              STRING COMMENT '发货截止时间',
+    serve_status                   STRING COMMENT '订单业务状态',
+    self_pickup_time               STRING COMMENT '申请自提时间,24小时内有效',
+    act_discount                   STRING COMMENT '平台折扣'
+)
+COMMENT '压测用 raw 贴源表(裁剪 28 敏感/冗余字段后 91 字段)'
+PARTITIONED BY (dt STRING)
+STORED AS ORC
+LOCATION '/user/hive/warehouse/test.db/raw_trd_card_group_order_info_inc_d';

+ 32 - 0
tests/integration/datax/import/stress/raw_trd_card_group_order_info_inc_d.ini

@@ -0,0 +1,32 @@
+; 压测 DataX ini:PG public.card_group_order_info → Hive test.raw_trd_card_group_order_info_inc_d
+; 裁剪 28 个敏感/冗余字段(91 字段保留),按 create_time 过滤
+; 数据源:prd-poyee-aliyun(生产实例,运维维护)
+; 裁剪名单:payment_num/prepay_id/payment_evid/payment_bank_type/shipping_address_linkname/
+;           shipping_address/shipping_address_phone/courier_num/out_refund_no/refund_id/
+;           trade_no/prop4/refund_curier_company/refund_courier_num/refund_ship_time/
+;           order_queue_low/order_queue_high/payment_appid/shipping_trade_no/pay_data/
+;           pay_config_id/pay_result_data/address_edit_json/app_user_nick_name/
+;           app_user_avatar/tenant_id/self_pickup_code/version
+; 注:key = value 不对齐
+
+[reader]
+dataSource = postgresql/prd-poyee-aliyun
+database = hs_sync_data
+table = public.card_group_order_info
+column = id,group_info_id,merchant_id,user_id,shipping_address_id,purchase_count,order_no,accounts_payable,actual_payment,payment_type,payment_time,coupon,discount,status,remark,create_time,create_by,update_time,update_by,payment_status,payment_status_desc,payment_success_time,del_flg,curier_company,refund_fee,refund_time,anonymous,pick_up_type,ship_time,refund_success_time,refund_recv_accout,refund_account,refund_request_source,card_price,act_price,goods_price_json,payment_sub_type,team_first,refuse_status,prop1,prop2,prop3,point,order_type,trade_amount,refund_type,refund_reason,evaluation,user_refund_time,refund_status,merchant_refund_reason,point_deduct,shipping_cost,merchant_remark,pay_record,order_sub_type,give_user_code,give_order_id,read_flag,give_num,invoice_id,combination_no,open_self,refund_desc,goods_allocate,close_payment_status,close_payment_time,finished_time,expire_time,settlement_amount,platform_coupon,platform_discount,discount_amount,member_discount,shipping_free_id,shipping_free_amount,discount_point,un_shipped_num,pre_un_shipped_num,wait_shipped_num,pre_wait_shipped_num,refuse_time,refuse_notice,pickup_time,waring_type,waring_status,point_type,delivery_end_time,serve_status,self_pickup_time,act_discount
+columnType =
+where = create_time >= '${start_date}' AND create_time < '${stop_date}'
+querySql =
+splitPk = id
+fetchSize = 1000
+
+[writer]
+dataSource = hdfs/prd-hdfs-ha
+path = /user/hive/warehouse/test.db/raw_trd_card_group_order_info_inc_d/dt=${dt}/
+column = id,group_info_id,merchant_id,user_id,shipping_address_id,purchase_count,order_no,accounts_payable,actual_payment,payment_type,payment_time,coupon,discount,status,remark,create_time,create_by,update_time,update_by,payment_status,payment_status_desc,payment_success_time,del_flg,curier_company,refund_fee,refund_time,anonymous,pick_up_type,ship_time,refund_success_time,refund_recv_accout,refund_account,refund_request_source,card_price,act_price,goods_price_json,payment_sub_type,team_first,refuse_status,prop1,prop2,prop3,point,order_type,trade_amount,refund_type,refund_reason,evaluation,user_refund_time,refund_status,merchant_refund_reason,point_deduct,shipping_cost,merchant_remark,pay_record,order_sub_type,give_user_code,give_order_id,read_flag,give_num,invoice_id,combination_no,open_self,refund_desc,goods_allocate,close_payment_status,close_payment_time,finished_time,expire_time,settlement_amount,platform_coupon,platform_discount,discount_amount,member_discount,shipping_free_id,shipping_free_amount,discount_point,un_shipped_num,pre_un_shipped_num,wait_shipped_num,pre_wait_shipped_num,refuse_time,refuse_notice,pickup_time,waring_type,waring_status,point_type,delivery_end_time,serve_status,self_pickup_time,act_discount
+columnType =
+fileType = orc
+fileName = raw_trd_card_group_order_info_inc_d
+encoding = UTF-8
+writeMode = truncate
+fieldDelimiter = \t