partition_overwrite_default.sql 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. -- 用途:验证 Spark 2.4 不设 spark.sql.sources.partitionOverwriteMode 时
  2. -- 动态分区 INSERT OVERWRITE 的实际默认行为。
  3. -- 跑法见同目录 README.md。
  4. CREATE DATABASE IF NOT EXISTS test;
  5. DROP TABLE IF EXISTS test.idempotence_pom_default;
  6. CREATE EXTERNAL TABLE test.idempotence_pom_default (
  7. id INT,
  8. val STRING
  9. )
  10. PARTITIONED BY (dt STRING)
  11. STORED AS ORC
  12. LOCATION '/user/hive/warehouse/test.db/idempotence_pom_default';
  13. -- 灌初始 5 个 dt
  14. INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260501') VALUES (1, 'init-501');
  15. INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260502') VALUES (2, 'init-502');
  16. INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260503') VALUES (3, 'init-503');
  17. INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260504') VALUES (4, 'init-504');
  18. INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt='20260505') VALUES (5, 'init-505');
  19. -- 关注点 A:初始分区集合(期望 5 个)
  20. SHOW PARTITIONS test.idempotence_pom_default;
  21. -- 不设 spark.sql.sources.partitionOverwriteMode,跑动态分区 INSERT OVERWRITE 只产 2 个 dt
  22. INSERT OVERWRITE TABLE test.idempotence_pom_default PARTITION (dt)
  23. SELECT 99 AS id, 'rewritten-503' AS val, '20260503' AS dt
  24. UNION ALL
  25. SELECT 100 AS id, 'rewritten-504' AS val, '20260504' AS dt;
  26. -- 关注点 B:跑完后分区集合
  27. SHOW PARTITIONS test.idempotence_pom_default;
  28. -- 关注点 C:实际数据落在哪
  29. SELECT * FROM test.idempotence_pom_default ORDER BY dt, id;