ent_logistics_label.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. from dw_base.spark.udf.enterprise.ent_clean_name_logistics import clean_company_name
  2. LOGISTIC_MATCH = [
  3. "AIR & SEA",
  4. "AIR + OCEAN",
  5. "APEX",
  6. "C.H. ROBINSON",
  7. "CARGO",
  8. "CONTAINER",
  9. "DELIVERY",
  10. "DHL",
  11. "EXPEDITORS",
  12. "EXPRESS",
  13. "FEDEX",
  14. "FORWARD",
  15. "FORWARDER",
  16. "FORWARDING",
  17. "FREIGHT",
  18. "KUEHNE NAGEL",
  19. "LINE",
  20. "LINES",
  21. "LOGISTIC",
  22. "LOGISTICAL",
  23. "LOGISTICS",
  24. "MAERSK",
  25. "OOCL",
  26. "ORDER",
  27. "SCHENKER",
  28. "SHIP",
  29. "SHIPPING",
  30. "SUPPLY CHAIN",
  31. "TRANSPORT",
  32. "TRANSPORTATION",
  33. "LOGISTICOS",
  34. "TRANSPORTES",
  35. "NVOCC",
  36. "AIR AND SEA",
  37. "AIR SEA",
  38. "AIRSEA",
  39. "DSV AIR SEA",
  40. "LOGISTICĂ",
  41. 'LOJISTIK'
  42. ]
  43. REMOVE_LOGISTIC_MATCH = ['VISAGE LINES PERSONAL CARE PRIVATE LIMITED']
  44. def contains_all_tokens(source_tokens, target_tokens):
  45. source_set = set(source_tokens)
  46. return all(token in source_set for token in target_tokens)
  47. def is_logistic_match(name):
  48. company_name = clean_company_name(name)
  49. name_tokens = company_name.split()
  50. for logistic_match in LOGISTIC_MATCH:
  51. logistic_tokens = clean_company_name(logistic_match).split()
  52. if contains_all_tokens(name_tokens, logistic_tokens):
  53. if 'CONTAINER BAG' in company_name:
  54. return False
  55. for remove_logistic_match in REMOVE_LOGISTIC_MATCH:
  56. if company_name == remove_logistic_match:
  57. return False
  58. return True
  59. return False
  60. if __name__ == '__main__':
  61. print(is_logistic_match('ALINE'))