Procházet zdrojové kódy

test(tracking): 加 ES 直读 INSERT 冒烟 DDL + SQL(全 string + struct 嵌套)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
tianyu.chu před 1 dnem
rodič
revize
51bf3d4a1d

+ 283 - 0
tests/integration/tracking/ddl/hive_raw_es.sql

@@ -0,0 +1,283 @@
+-- 埋点 raw 层(ES SparkSQL 直读路径冒烟)
+-- 表结构按 v_es_traces schema 1:1 翻:全 string + 嵌套 struct 保留
+-- 临时表名 _es 后缀,与现有单字段表 test.raw_usr_traces_apd_d 并存;冒烟通过后下一轮收敛
+
+CREATE EXTERNAL TABLE IF NOT EXISTS test.raw_usr_traces_apd_d_es (
+    _class STRING,
+    anonymousId STRING,
+    distinctId STRING,
+    event STRING,
+    flushTime STRING,
+    lib STRUCT<
+        appVersion: STRING,
+        lib: STRING,
+        libDetail: STRING,
+        libMethod: STRING,
+        libVersion: STRING
+    >,
+    loginId STRING,
+    productInstock STRING,
+    properties STRUCT<
+        appId: STRING,
+        appName: STRING,
+        appVersion: STRING,
+        brand: STRING,
+        carrier: STRING,
+        deviceId: STRING,
+        eventDuration: STRING,
+        isFirstDay: STRING,
+        isFirstTime: STRING,
+        isSubAccount: STRING,
+        lib: STRING,
+        libMethod: STRING,
+        libVersion: STRING,
+        manufacturer: STRING,
+        merchantId: STRING,
+        model: STRING,
+        networkType: STRING,
+        os: STRING,
+        osVersion: STRING,
+        params: STRUCT<
+            CalendarProductClick: STRING,
+            CalendarProductLikes: STRING,
+            CalendarProductShare: STRING,
+            CalendarProductView: STRING,
+            CalendarView: STRING,
+            ContentClick: STRING,
+            ContentDetail: STRING,
+            ContentSearchClick: STRING,
+            ContentSearchRequest: STRING,
+            ContentSearchResultClick: STRING,
+            ContentShare: STRING,
+            ContentView: STRING,
+            Duration: STRING,
+            OtherIconClick: STRING,
+            _class: STRING,
+            activityEachpoint: STRING,
+            activityExchangeNumber: STRING,
+            activityId: STRING,
+            activityName: STRING,
+            activityNumber: STRING,
+            adId: STRING,
+            adName: STRING,
+            adType: STRING,
+            adUrl: STRING,
+            addetailDuration: STRING,
+            bannerId: STRING,
+            bannerName: STRING,
+            bannerSort: STRING,
+            bannerSource: STRING,
+            bannerUrl: STRING,
+            bannerdetailDuration: STRING,
+            btnParam: STRING,
+            btnType: STRING,
+            buySouce: STRING,
+            calendarDuration: STRING,
+            calendarLikesSource: STRING,
+            calendarLikesStatus: STRING,
+            calendarName: STRING,
+            calendarProductDuration: STRING,
+            calendarProductId: STRING,
+            calendarProductListId: STRING,
+            calendarProductName: STRING,
+            calendarProductSource: STRING,
+            calendarSource: STRING,
+            cardName: STRING,
+            cardQuantity: STRING,
+            cardreportSouce: STRING,
+            clickType: STRING,
+            collectSouce: STRING,
+            contentDuration: STRING,
+            contentId: STRING,
+            contentMediaType: STRING,
+            contentSource: STRING,
+            contentStyleType: STRING,
+            contentTitle: STRING,
+            contentType: STRING,
+            couponAmount: STRING,
+            couponDetailsSource: STRING,
+            couponExchangeSource: STRING,
+            couponId: STRING,
+            couponName: STRING,
+            couponSource: STRING,
+            couponTable: STRING,
+            couponType: STRING,
+            duration: STRING,
+            entrySpecialEffectsSource: STRING,
+            exchangeIntegral: STRING,
+            expressName: STRING,
+            expressNo: STRING,
+            failReason: STRING,
+            followSouce: STRING,
+            freecardSouce: STRING,
+            freightAmount: STRING,
+            freightCouponAmount: STRING,
+            freightCouponExchangeSource: STRING,
+            freightCouponId: STRING,
+            freightCouponName: STRING,
+            freightCouponSource: STRING,
+            freightDetailsSource: STRING,
+            friendCode: STRING,
+            groupBuyQuantity: STRING,
+            groupCode: STRING,
+            groupDetailSouce: STRING,
+            groupDisplay: STRING,
+            groupId: STRING,
+            groupManufacturer: STRING,
+            groupName: STRING,
+            groupNumSpecifications: STRING,
+            groupPlay: STRING,
+            groupPrice: STRING,
+            groupProgress: STRING,
+            groupQuantity: STRING,
+            groupSets: STRING,
+            groupSource: STRING,
+            groupSpecifications: STRING,
+            groupSport: STRING,
+            groupStartTime: STRING,
+            groupState: STRING,
+            groupStoreSource: STRING,
+            groupType: STRING,
+            groupYear: STRING,
+            groupdetailDuration: STRING,
+            grouporderStatusName: STRING,
+            historyCouponTable: STRING,
+            homepageType: STRING,
+            isClickAllon: STRING,
+            isExchangeSuccess: STRING,
+            isFollowAnchor: STRING,
+            isFollowShop: STRING,
+            isLoginSuccess: STRING,
+            isNewRegister: STRING,
+            isOpenIcon: STRING,
+            isOpenSpecialEffects: STRING,
+            isSuccess: STRING,
+            isUseCoupon: STRING,
+            isUseStoreCoupon: STRING,
+            keyWord: STRING,
+            latLng: STRING,
+            levelIconSource: STRING,
+            lightActivityId: STRING,
+            lightActivityName: STRING,
+            limitedSource: STRING,
+            liveReplaySource: STRING,
+            log: STRING,
+            logType: STRING,
+            loginMethod: STRING,
+            loginSource: STRING,
+            luckybagId: STRING,
+            luckybagName: STRING,
+            luckybagResult: STRING,
+            mallOrderAmount: STRING,
+            mallorderStatusName: STRING,
+            menuCategory: STRING,
+            menuId: STRING,
+            menuName: STRING,
+            messagetypeName: STRING,
+            onsaleClickname: STRING,
+            onsaleQuantity: STRING,
+            orderActualAmount: STRING,
+            orderAmount: STRING,
+            orderId: STRING,
+            orderNo: STRING,
+            orderTotalQuantity: STRING,
+            paniniBaseId: STRING,
+            payResult: STRING,
+            payType: STRING,
+            paymentCallBack: STRING,
+            paymentChannel: STRING,
+            paymentMethod: STRING,
+            paymentSerialNumber: STRING,
+            playerId: STRING,
+            playerName: STRING,
+            pointsMallDiscountSource: STRING,
+            positionNumber: STRING,
+            productActivityType: STRING,
+            productBuyQuantity: STRING,
+            productCategory: STRING,
+            productCode: STRING,
+            productDetailSouce: STRING,
+            productDisplay: STRING,
+            productId: STRING,
+            productInstock: STRING,
+            productIntegral: STRING,
+            productName: STRING,
+            productNormalType: STRING,
+            productPrice: STRING,
+            productQuantity: STRING,
+            productReferenceprice: STRING,
+            productRemarks: STRING,
+            productRules: STRING,
+            productShoptype: STRING,
+            productSku: STRING,
+            productSkuBuyQuantity: STRING,
+            productSkuIntegral: STRING,
+            productSkuName: STRING,
+            productSkuPrice: STRING,
+            productSource: STRING,
+            productSpu: STRING,
+            productState: STRING,
+            productType: STRING,
+            productdetailDuration: STRING,
+            recId: STRING,
+            receiverAddress: STRING,
+            receiverArea: STRING,
+            receiverName: STRING,
+            receiverTelephone: STRING,
+            refAdId: STRING,
+            resultNumber: STRING,
+            roomId: STRING,
+            roomName: STRING,
+            roomSource: STRING,
+            roomType: STRING,
+            roomtypeName: STRING,
+            ruleSource: STRING,
+            scanKeyword: STRING,
+            scanResultNumber: STRING,
+            scanSouce: STRING,
+            screenshotId: STRING,
+            screenshotName: STRING,
+            screenshotSource: STRING,
+            searchSource: STRING,
+            shareSource: STRING,
+            shareType: STRING,
+            shopId: STRING,
+            shopName: STRING,
+            shopSouce: STRING,
+            showType: STRING,
+            slideUnlockQuantity: STRING,
+            specificationsClickname: STRING,
+            status: STRING,
+            storeCouponAmount: STRING,
+            storeCouponId: STRING,
+            storeCouponName: STRING,
+            storeId: STRING,
+            storeName: STRING,
+            toPath: STRING,
+            transportationCosts: STRING,
+            typeName: STRING,
+            userCode: STRING,
+            userType: STRING
+        >,
+        platform: STRING,
+        resumeFromBackground: STRING,
+        screenHeight: STRING,
+        screenWidth: STRING,
+        systemType: STRING,
+        timezoneOffset: STRING,
+        title: STRING,
+        userId: STRING,
+        userLevel: STRING,
+        userName: STRING,
+        userRole: STRING,
+        webUserName: STRING,
+        wgtVersion: STRING,
+        wifi: STRING
+    >,
+    time STRING,
+    type STRING
+)
+COMMENT '埋点贴源表(ES SparkSQL 直读冒烟,全 string)'
+PARTITIONED BY (dt STRING COMMENT 'yyyymmdd')
+STORED AS ORC
+LOCATION '/user/hive/warehouse/test.db/raw_usr_traces_apd_d_es';

+ 22 - 0
tests/integration/tracking/es_insert_smoke.sql

@@ -0,0 +1,22 @@
+-- 埋点 raw 层 INSERT 冒烟(ES SparkSQL 直读 → Hive ORC)
+-- 跑:python3 bin/spark-sql-starter.py -f tests/integration/tracking/es_insert_smoke.sql
+-- 前置:
+--   1. 集群 spark/jars/ 已挂 elasticsearch-spark-20_2.11-7.17.29.jar(kb/01 §5)
+--   2. 已建表:python3 bin/spark-sql-starter.py -f tests/integration/tracking/ddl/hive_raw_es.sql
+
+CREATE TEMPORARY VIEW v_es_traces
+USING org.elasticsearch.spark.sql
+OPTIONS (
+  'es.nodes' = '192.168.33.21',
+  'es.port' = '9200',
+  'es.nodes.wan.only' = 'true',
+  'es.resource' = 'traces-2026-04-08'
+);
+
+INSERT OVERWRITE TABLE test.raw_usr_traces_apd_d_es PARTITION (dt='20260408')
+SELECT * FROM v_es_traces LIMIT 5000;
+
+SELECT COUNT(*) AS cnt FROM test.raw_usr_traces_apd_d_es WHERE dt='20260408';
+
+SELECT _class, anonymousId, event, time, properties.os, properties.params.groupId
+FROM test.raw_usr_traces_apd_d_es WHERE dt='20260408' LIMIT 3;