|
|
@@ -0,0 +1,37 @@
|
|
|
+-- 用途:验证 Spark 2.4 不设 spark.sql.sources.partitionOverwriteMode 时
|
|
|
+-- 动态分区 INSERT OVERWRITE 的实际默认行为。
|
|
|
+-- 跑法见同目录 README.md。
|
|
|
+
|
|
|
+CREATE DATABASE IF NOT EXISTS test;
|
|
|
+
|
|
|
+DROP TABLE IF EXISTS test.idempotence_pom_default;
|
|
|
+
|
|
|
+CREATE EXTERNAL TABLE test.idempotence_pom_default (
|
|
|
+ id INT,
|
|
|
+ val STRING
|
|
|
+)
|
|
|
+PARTITIONED BY (dt STRING)
|
|
|
+STORED AS ORC
|
|
|
+LOCATION '/user/hive/warehouse/test.db/idempotence_pom_default';
|
|
|
+
|
|
|
+-- 灌初始 5 个 dt
|
|
|
+INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260501') VALUES (1, 'init-501');
|
|
|
+INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260502') VALUES (2, 'init-502');
|
|
|
+INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260503') VALUES (3, 'init-503');
|
|
|
+INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260504') VALUES (4, 'init-504');
|
|
|
+INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260505') VALUES (5, 'init-505');
|
|
|
+
|
|
|
+-- 关注点 A:初始分区集合(期望 5 个)
|
|
|
+SHOW PARTITIONS test.idempotence_pom_default;
|
|
|
+
|
|
|
+-- 不设 spark.sql.sources.partitionOverwriteMode,跑动态分区 INSERT OVERWRITE 只产 2 个 dt
|
|
|
+INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt)
|
|
|
+SELECT 99 AS id, 'rewritten-503' AS val, '20260503' AS dt
|
|
|
+UNION ALL
|
|
|
+SELECT 100 AS id, 'rewritten-504' AS val, '20260504' AS dt;
|
|
|
+
|
|
|
+-- 关注点 B:跑完后分区集合
|
|
|
+SHOW PARTITIONS test.idempotence_pom_default;
|
|
|
+
|
|
|
+-- 关注点 C:实际数据落在哪
|
|
|
+SELECT * FROM test.idempotence_pom_default ORDER BY dt, id;
|