| 123456789101112131415161718192021222324252627282930313233343536373839404142434445 |
- import json
- import re
- from pyspark.sql.functions import udf
- from pyspark.sql.types import *
- @udf(returnType=ArrayType(StringType()))
- def str_to_arr(json_str: str) -> list:
- if json_str:
- return json.loads(json_str)
- return []
- @udf(returnType=ArrayType(MapType(StringType(), StringType())))
- def str_to_map_arr(json_str: str) -> list:
- if json_str:
- return json.loads(json_str)
- return []
- def merge_ws(text: str):
- if text:
- return ' '.join(text.split())
- return None
- @udf(returnType=ArrayType(StringType()))
- def explode_str_to_arr(text: str) -> list:
- if text is None:
- return []
- if len(text) <= 8:
- return [text]
- #大于8位时,从后往前,每少一位截取一个字符串,存入数组中
- return [text[:i] for i in range(len(text), 7, -1)]
- def remove_special_char(text,char):
- if text is not None and text.endswith(char):
- return text[:-1]
- return text
- if __name__ == '__main__':
- # arr = str_to_arr('[{"email":"aline@forusi.com.br","type":"prospect","status":"verified","position":"Analista de Recursos Humanos","firstName":"Aline","lastName":"Cavalheiro","companyName":"Forusi","sourcePage":"https://www.linkedin.com/in/aline-cavalheiro-bb3644b8"},{"email":"karina@forusi.com.br","type":"prospect","status":"verified","position":"Coordenadora de vendas","firstName":"Karina","lastName":"Evangelista de Oliveira","companyName":"Forusi","sourcePage":"https://www.linkedin.com/in/karina-evangelista-de-oliveira-412934a6"},{"email":"raphael@forusi.com.br","type":"prospect","status":"verified","position":"Comprador Pleno","firstName":"Raphael","lastName":"Mendonça","companyName":"Forusi","sourcePage":"https://www.linkedin.com/in/raphael-mendon%C3%A7a-a7b882116"}]')
- # print(type(arr))
- arr = explode_str_to_arr('fsdfsafas')
- print(arr)
|