-- 埋点 raw 层 INSERT 冒烟(ES SparkSQL 直读 → Hive ORC) -- 跑:python3 bin/spark-sql-starter.py -f tests/integration/tracking/es_insert_smoke.sql -- 前置: -- 1. 集群 spark/jars/ 已挂 elasticsearch-spark-20_2.11-7.17.29.jar(kb/01 §5) -- 2. 已建表:python3 bin/spark-sql-starter.py -f tests/integration/tracking/ddl/hive_raw_es.sql CREATE TEMPORARY VIEW v_es_traces USING org.elasticsearch.spark.sql OPTIONS ( 'es.nodes' = '192.168.33.21', 'es.port' = '9200', 'es.nodes.wan.only' = 'true', 'es.resource' = 'traces-2026-04-08' ); INSERT OVERWRITE TABLE test.raw_usr_traces_apd_d_es PARTITION (dt='20260408') SELECT _class, anonymousId, distinctId, event, CAST(flushTime AS STRING) AS flushTime, to_json(lib) AS lib, loginId, CAST(productInstock AS STRING) AS productInstock, to_json(properties) AS properties, CAST(`time` AS STRING) AS `time`, type FROM v_es_traces LIMIT 5000; SELECT COUNT(*) AS cnt FROM test.raw_usr_traces_apd_d_es WHERE dt='20260408'; SELECT _class, anonymousId, event, `time`, get_json_object(properties, '$.os') AS os, get_json_object(properties, '$.params.groupId') AS group_id FROM test.raw_usr_traces_apd_d_es WHERE dt='20260408' LIMIT 3;