-- 用途:验证 Spark 2.4 不设 spark.sql.sources.partitionOverwriteMode 时 -- 动态分区 INSERT OVERWRITE 的实际默认行为。 -- 跑法见同目录 README.md。 CREATE DATABASE IF NOT EXISTS test; DROP TABLE IF EXISTS test.idempotence_pom_default; CREATE EXTERNAL TABLE test.idempotence_pom_default ( id INT, val STRING ) PARTITIONED BY (dt STRING) STORED AS ORC LOCATION '/user/hive/warehouse/test.db/idempotence_pom_default'; -- 灌初始 5 个 dt INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260501') VALUES (1, 'init-501'); INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260502') VALUES (2, 'init-502'); INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260503') VALUES (3, 'init-503'); INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260504') VALUES (4, 'init-504'); INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260505') VALUES (5, 'init-505'); -- 关注点 A:初始分区集合(期望 5 个) SHOW PARTITIONS test.idempotence_pom_default; -- 不设 spark.sql.sources.partitionOverwriteMode,跑动态分区 INSERT OVERWRITE 只产 2 个 dt INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt) SELECT 99 AS id, 'rewritten-503' AS val, '20260503' AS dt UNION ALL SELECT 100 AS id, 'rewritten-504' AS val, '20260504' AS dt; -- 关注点 B:跑完后分区集合 SHOW PARTITIONS test.idempotence_pom_default; -- 关注点 C:实际数据落在哪 SELECT * FROM test.idempotence_pom_default ORDER BY dt, id;