diff_utils.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. import json
  2. import re
  3. import os
  4. def ensure_json_values(bson_str):
  5. def replace_special_types(match):
  6. prefix = match.group(1)
  7. key = match.group(2)
  8. value = match.group(3).replace('"', "")
  9. suffix = match.group(4)
  10. comma_or_newline = match.group(5)
  11. return f'{prefix}{key}{value}{suffix}{comma_or_newline}'
  12. patterns = {
  13. 'normal': re.compile(r'([ \t]*)(\"[^\"]*\"\s*:\s*)([^\",\{\}\[\]\s].*?)([,\n])'),
  14. 'special_types': re.compile(
  15. r'([ \t]*)(\"(?:ObjectId|ISODate|NumberInt|NumberLong|NumberDecimal|Binary|Boolean|Timestamp|RegExp|DBRef|JavaScript code|Symbol|MinKey|MaxKey)\()\"([^\"\)]*)\"(\).+?)([,\n])')
  16. }
  17. # 使用正则表达式替换非特殊类型的值
  18. bson_str = re.sub(patterns['normal'],
  19. lambda match: f'{match.group(1)}{match.group(2)}"{match.group(3)}"{match.group(4)}', bson_str)
  20. # 使用定义的函数替换特殊类型的值
  21. bson_str = re.sub(patterns['special_types'], replace_special_types, bson_str)
  22. return bson_str
  23. def analyze_json_objects(file_path):
  24. json_objects = []
  25. with open(file_path, 'r') as file:
  26. content = ensure_json_values(file.read())
  27. json_strings = re.split(r'\n\s*\n', content.strip())
  28. for js in json_strings:
  29. if js:
  30. try:
  31. json_obj = json.loads(js)
  32. json_objects.append(json_obj)
  33. except json.JSONDecodeError:
  34. print(f'Error decoding JSON from string: {js}')
  35. id_to_json_map = {}
  36. for jsonObj in json_objects:
  37. if isinstance(jsonObj, dict):
  38. json_id = jsonObj.get('_id', None)
  39. if json_id is not None:
  40. if json_id in id_to_json_map:
  41. id_to_json_map[json_id].append(jsonObj)
  42. else:
  43. id_to_json_map[json_id] = [jsonObj]
  44. return id_to_json_map
  45. def compare_json_objects(id_to_json_map):
  46. output_lines = []
  47. for json_id, json_list in id_to_json_map.items():
  48. if len(json_list) != 2:
  49. continue
  50. first, second = json_list
  51. first_set = set(first.keys())
  52. second_set = set(second.keys())
  53. intersect_keys = first_set & second_set
  54. unique_first = first_set - second_set
  55. unique_second = second_set - first_set
  56. output_lines.append(f"比较的MongoId: {json_id}")
  57. if unique_first:
  58. output_lines.append("新表中独有的:")
  59. for key in unique_first:
  60. output_lines.append(f" {key}: {first.get(key)}")
  61. if unique_second:
  62. output_lines.append("旧表中独有的:")
  63. for key in unique_second:
  64. output_lines.append(f" {key}: {second.get(key)}")
  65. differing_values = []
  66. for key in intersect_keys:
  67. if first.get(key) != second.get(key):
  68. differing_values.append(f" {key}: {first.get(key)} vs {second.get(key)}")
  69. if differing_values:
  70. output_lines.append("两表value不同的:")
  71. output_lines.extend(differing_values)
  72. output_lines.append("")
  73. current_path = os.getcwd()
  74. target_folder = 'tendata-warehouse'
  75. base_path = get_base_path(current_path, target_folder)
  76. # 可以指定自己的输出路径
  77. file_path = os.path.join(base_path, "workspace", "output.txt")
  78. with open(file_path, 'w') as output_file:
  79. output_file.write('\n'.join(output_lines))
  80. print("可在服务器查看{}".format(file_path))
  81. def get_base_path(current_path, target_folder):
  82. path_parts = current_path.split(os.sep)
  83. target_index = path_parts.index(target_folder)
  84. base_path = os.sep.join(path_parts[:target_index + 1])
  85. return base_path
  86. def main():
  87. current_path = os.getcwd()
  88. target_folder = 'tendata-warehouse'
  89. base_path = get_base_path(current_path, target_folder)
  90. relative_path = input("请输入比较文件的相对路径 Path From Content Root: ")
  91. file_path = os.path.join(base_path, relative_path)
  92. id_to_json_map = analyze_json_objects(file_path)
  93. compare_json_objects(id_to_json_map)
  94. if __name__ == "__main__":
  95. main()