| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- from dw_base.spark.udf.enterprise.ent_clean_name_logistics import clean_company_name
- LOGISTIC_MATCH = [
- "AIR & SEA",
- "AIR + OCEAN",
- "APEX",
- "C.H. ROBINSON",
- "CARGO",
- "CONTAINER",
- "DELIVERY",
- "DHL",
- "EXPEDITORS",
- "EXPRESS",
- "FEDEX",
- "FORWARD",
- "FORWARDER",
- "FORWARDING",
- "FREIGHT",
- "KUEHNE NAGEL",
- "LINE",
- "LINES",
- "LOGISTIC",
- "LOGISTICAL",
- "LOGISTICS",
- "MAERSK",
- "OOCL",
- "ORDER",
- "SCHENKER",
- "SHIP",
- "SHIPPING",
- "SUPPLY CHAIN",
- "TRANSPORT",
- "TRANSPORTATION",
- "LOGISTICOS",
- "TRANSPORTES",
- "NVOCC",
- "AIR AND SEA",
- "AIR SEA",
- "AIRSEA",
- "DSV AIR SEA",
- "LOGISTICĂ",
- 'LOJISTIK'
- ]
- REMOVE_LOGISTIC_MATCH = ['VISAGE LINES PERSONAL CARE PRIVATE LIMITED']
- def contains_all_tokens(source_tokens, target_tokens):
- source_set = set(source_tokens)
- return all(token in source_set for token in target_tokens)
- def is_logistic_match(name):
- company_name = clean_company_name(name)
- name_tokens = company_name.split()
- for logistic_match in LOGISTIC_MATCH:
- logistic_tokens = clean_company_name(logistic_match).split()
- if contains_all_tokens(name_tokens, logistic_tokens):
- if 'CONTAINER BAG' in company_name:
- return False
- for remove_logistic_match in REMOVE_LOGISTIC_MATCH:
- if company_name == remove_logistic_match:
- return False
- return True
- return False
- if __name__ == '__main__':
- print(is_logistic_match('ALINE'))
|