| 12345678910111213141516171819202122 |
- -- 埋点 raw 层 INSERT 冒烟(ES SparkSQL 直读 → Hive ORC)
- -- 跑:python3 bin/spark-sql-starter.py -f tests/integration/tracking/es_insert_smoke.sql
- -- 前置:
- -- 1. 集群 spark/jars/ 已挂 elasticsearch-spark-20_2.11-7.17.29.jar(kb/01 §5)
- -- 2. 已建表:python3 bin/spark-sql-starter.py -f tests/integration/tracking/ddl/hive_raw_es.sql
- CREATE TEMPORARY VIEW v_es_traces
- USING org.elasticsearch.spark.sql
- OPTIONS (
- 'es.nodes' = '192.168.33.21',
- 'es.port' = '9200',
- 'es.nodes.wan.only' = 'true',
- 'es.resource' = 'traces-2026-04-08'
- );
- INSERT OVERWRITE TABLE test.raw_usr_traces_apd_d_es PARTITION (dt='20260408')
- SELECT * FROM v_es_traces LIMIT 5000;
- SELECT COUNT(*) AS cnt FROM test.raw_usr_traces_apd_d_es WHERE dt='20260408';
- SELECT _class, anonymousId, event, time, properties.os, properties.params.groupId
- FROM test.raw_usr_traces_apd_d_es WHERE dt='20260408' LIMIT 3;
|