import json import re from pyspark.sql.functions import udf from pyspark.sql.types import * @udf(returnType=ArrayType(StringType())) def str_to_arr(json_str: str) -> list: if json_str: return json.loads(json_str) return [] @udf(returnType=ArrayType(MapType(StringType(), StringType()))) def str_to_map_arr(json_str: str) -> list: if json_str: return json.loads(json_str) return [] def merge_ws(text: str): if text: return ' '.join(text.split()) return None @udf(returnType=ArrayType(StringType())) def explode_str_to_arr(text: str) -> list: if text is None: return [] if len(text) <= 8: return [text] #大于8位时,从后往前,每少一位截取一个字符串,存入数组中 return [text[:i] for i in range(len(text), 7, -1)] def remove_special_char(text,char): if text is not None and text.endswith(char): return text[:-1] return text if __name__ == '__main__': # arr = str_to_arr('[{"email":"aline@forusi.com.br","type":"prospect","status":"verified","position":"Analista de Recursos Humanos","firstName":"Aline","lastName":"Cavalheiro","companyName":"Forusi","sourcePage":"https://www.linkedin.com/in/aline-cavalheiro-bb3644b8"},{"email":"karina@forusi.com.br","type":"prospect","status":"verified","position":"Coordenadora de vendas","firstName":"Karina","lastName":"Evangelista de Oliveira","companyName":"Forusi","sourcePage":"https://www.linkedin.com/in/karina-evangelista-de-oliveira-412934a6"},{"email":"raphael@forusi.com.br","type":"prospect","status":"verified","position":"Comprador Pleno","firstName":"Raphael","lastName":"Mendonça","companyName":"Forusi","sourcePage":"https://www.linkedin.com/in/raphael-mendon%C3%A7a-a7b882116"}]') # print(type(arr)) arr = explode_str_to_arr('fsdfsafas') print(arr)