| 123456789101112131415161718192021222324252627282930313233343536373839404142 |
- -- 用途:验证 Spark 2.4 不设 spark.sql.sources.partitionOverwriteMode 时
- -- 动态分区 INSERT OVERWRITE 的实际默认行为。
- -- 跑法见同目录 README.md。
- -- Hive 动态分区前置(不设动态分区 INSERT 跑不起来;这两条与本测试目标
- -- spark.sql.sources.partitionOverwriteMode 无关,是底层执行必需条件)
- -- set hive.exec.dynamic.partition=true;
- -- set hive.exec.dynamic.partition.mode=nonstrict;
- CREATE DATABASE IF NOT EXISTS test;
- DROP TABLE IF EXISTS test.idempotence_pom_default;
- CREATE EXTERNAL TABLE test.idempotence_pom_default (
- id INT,
- val STRING
- )
- PARTITIONED BY (dt STRING)
- STORED AS ORC
- LOCATION '/user/hive/warehouse/test.db/idempotence_pom_default';
- -- 灌初始 5 个 dt
- INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260501') VALUES (1, 'init-501');
- INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260502') VALUES (2, 'init-502');
- INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260503') VALUES (3, 'init-503');
- INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260504') VALUES (4, 'init-504');
- INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260505') VALUES (5, 'init-505');
- -- 关注点 A:初始分区集合(期望 5 个)
- SHOW PARTITIONS test.idempotence_pom_default;
- -- 不设 spark.sql.sources.partitionOverwriteMode,跑动态分区 INSERT OVERWRITE 只产 2 个 dt
- INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt)
- SELECT 99 AS id, 'rewritten-503' AS val, '20260503' AS dt
- UNION ALL
- SELECT 100 AS id, 'rewritten-504' AS val, '20260504' AS dt;
- -- 关注点 B:跑完后分区集合
- SHOW PARTITIONS test.idempotence_pom_default;
- -- 关注点 C:实际数据落在哪
- SELECT * FROM test.idempotence_pom_default ORDER BY dt, id;
|