from dw_base.spark.udf.enterprise.ent_clean_name_logistics import clean_company_name LOGISTIC_MATCH = [ "AIR & SEA", "AIR + OCEAN", "APEX", "C.H. ROBINSON", "CARGO", "CONTAINER", "DELIVERY", "DHL", "EXPEDITORS", "EXPRESS", "FEDEX", "FORWARD", "FORWARDER", "FORWARDING", "FREIGHT", "KUEHNE NAGEL", "LINE", "LINES", "LOGISTIC", "LOGISTICAL", "LOGISTICS", "MAERSK", "OOCL", "ORDER", "SCHENKER", "SHIP", "SHIPPING", "SUPPLY CHAIN", "TRANSPORT", "TRANSPORTATION", "LOGISTICOS", "TRANSPORTES", "NVOCC", "AIR AND SEA", "AIR SEA", "AIRSEA", "DSV AIR SEA", "LOGISTICĂ", 'LOJISTIK' ] REMOVE_LOGISTIC_MATCH = ['VISAGE LINES PERSONAL CARE PRIVATE LIMITED'] def contains_all_tokens(source_tokens, target_tokens): source_set = set(source_tokens) return all(token in source_set for token in target_tokens) def is_logistic_match(name): company_name = clean_company_name(name) name_tokens = company_name.split() for logistic_match in LOGISTIC_MATCH: logistic_tokens = clean_company_name(logistic_match).split() if contains_all_tokens(name_tokens, logistic_tokens): if 'CONTAINER BAG' in company_name: return False for remove_logistic_match in REMOVE_LOGISTIC_MATCH: if company_name == remove_logistic_match: return False return True return False if __name__ == '__main__': print(is_logistic_match('ALINE'))