es_insert_smoke.sql 914 B

12345678910111213141516171819202122
  1. -- 埋点 raw 层 INSERT 冒烟(ES SparkSQL 直读 → Hive ORC)
  2. -- 跑:python3 bin/spark-sql-starter.py -f tests/integration/tracking/es_insert_smoke.sql
  3. -- 前置:
  4. -- 1. 集群 spark/jars/ 已挂 elasticsearch-spark-20_2.11-7.17.29.jar(kb/01 §5)
  5. -- 2. 已建表:python3 bin/spark-sql-starter.py -f tests/integration/tracking/ddl/hive_raw_es.sql
  6. CREATE TEMPORARY VIEW v_es_traces
  7. USING org.elasticsearch.spark.sql
  8. OPTIONS (
  9. 'es.nodes' = '192.168.33.21',
  10. 'es.port' = '9200',
  11. 'es.nodes.wan.only' = 'true',
  12. 'es.resource' = 'traces-2026-04-08'
  13. );
  14. INSERT OVERWRITE TABLE test.raw_usr_traces_apd_d_es PARTITION (dt='20260408')
  15. SELECT * FROM v_es_traces LIMIT 5000;
  16. SELECT COUNT(*) AS cnt FROM test.raw_usr_traces_apd_d_es WHERE dt='20260408';
  17. SELECT _class, anonymousId, event, time, properties.os, properties.params.groupId
  18. FROM test.raw_usr_traces_apd_d_es WHERE dt='20260408' LIMIT 3;