inflect_udf.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. # encoding: utf8
  2. import inflect
  3. # 创建inflect引擎实例
  4. p = inflect.engine()
  5. # 自定义单复数词表
  6. p.defnoun("appendix", "appendices")
  7. p.defnoun("bus", "buses")
  8. p.defnoun("thesis", "theses")
  9. p.defnoun("index", "indices")
  10. p.defnoun("axis", "axes")
  11. p.defnoun("cactus", "cacti")
  12. p.defnoun("focus", "foci")
  13. p.defnoun("fungus", "fungi")
  14. p.defnoun("radius", "radii")
  15. p.defnoun("nucleus", "nuclei")
  16. p.defnoun("synopsis", "synopses")
  17. p.defnoun("crisis", "crises")
  18. p.defnoun("analysis", "analyses")
  19. p.defnoun("diagnosis", "diagnoses")
  20. p.defnoun("phenomenon", "phenomena")
  21. p.defnoun("criterion", "criteria")
  22. p.defnoun("matrix", "matrices")
  23. p.defnoun("die", "dies")
  24. # 用户自定义的单数单词词表
  25. USER_DEFINED_SINGULAR_WORDS = [singular_word.lower() for singular_word in p.pl_sb_user_defined[::2]]
  26. def singular(word: str):
  27. """
  28. 将复数名词转换为单数形式。
  29. :param word: 需要转换的名词, eg:"COMPONENTS"
  30. :return: 单数形式的名词, "COMPONENT"
  31. """
  32. if word is None or word.strip() == '':
  33. return word
  34. try:
  35. word_l = word.lower()
  36. # 用户自定义的单数单词列表
  37. if word_l in USER_DEFINED_SINGULAR_WORDS:
  38. return word
  39. # ss结尾, 's结尾
  40. if word_l.endswith("ss") or word_l.endswith("'s"):
  41. return word
  42. # 单词长度小于3
  43. if len(word) <= 3 and word_l not in ["men"]:
  44. return word
  45. singular_form = p.singular_noun(word)
  46. if singular_form is False:
  47. # 如果word本身就是单数形式,则直接返回原字符串
  48. return word
  49. return singular_form
  50. except Exception as _:
  51. return word
  52. def phrase_singular(phrase: str):
  53. """
  54. 将词组的最后一个单词复数转单数
  55. :param phrase: eg:"GEARBOX COMPONENTS"
  56. :return: "GEARBOX COMPONENT"
  57. """
  58. if phrase is None or phrase == '':
  59. return None
  60. words = phrase.split()
  61. if len(words) > 1:
  62. tmp = words[0: -1]
  63. tmp.append(singular(words[-1]))
  64. return " ".join(tmp)
  65. else:
  66. return singular(phrase)