mysql_pool.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
  1. # -*- coding: utf-8 -*-
  2. # Author : Charley
  3. # Python : 3.10.8
  4. # Date : 2025/3/25 14:14
  5. import re
  6. import pymysql
  7. import YamlLoader
  8. from loguru import logger
  9. from dbutils.pooled_db import PooledDB
  10. # 获取yaml配置
  11. yaml = YamlLoader.readYaml()
  12. mysqlYaml = yaml.get("mysql")
  13. sql_host = mysqlYaml.getValueAsString("host")
  14. sql_port = mysqlYaml.getValueAsInt("port")
  15. sql_user = mysqlYaml.getValueAsString("username")
  16. sql_password = mysqlYaml.getValueAsString("password")
  17. sql_db = mysqlYaml.getValueAsString("db")
  18. class MySQLConnectionPool:
  19. """
  20. MySQL连接池
  21. """
  22. def __init__(self, mincached=4, maxcached=5, maxconnections=10, log=None):
  23. """
  24. 初始化连接池
  25. :param mincached: 初始化时,链接池中至少创建的链接,0表示不创建
  26. :param maxcached: 池中空闲连接的最大数目(0 或 None 表示池大小不受限制)
  27. :param maxconnections: 允许的最大连接数(0 或 None 表示任意数量的连接)
  28. :param log: 自定义日志记录器
  29. """
  30. # 使用 loguru 的 logger,如果传入了其他 logger,则使用传入的 logger
  31. self.log = log or logger
  32. self.pool = PooledDB(
  33. creator=pymysql,
  34. mincached=mincached,
  35. maxcached=maxcached,
  36. maxconnections=maxconnections,
  37. blocking=True, # 连接池中如果没有可用连接后,是否阻塞等待。True,等待;False,不等待然后报错
  38. host=sql_host,
  39. port=sql_port,
  40. user=sql_user,
  41. password=sql_password,
  42. database=sql_db,
  43. ping=0 # 每次连接使用时自动检查有效性(0=不检查,1=执行query前检查,2=每次执行前检查)
  44. )
  45. def _execute(self, query, args=None, commit=False):
  46. """
  47. 执行SQL
  48. :param query: SQL语句
  49. :param args: SQL参数
  50. :param commit: 是否提交事务
  51. :return: 查询结果
  52. """
  53. try:
  54. with self.pool.connection() as conn:
  55. with conn.cursor() as cursor:
  56. cursor.execute(query, args)
  57. if commit:
  58. conn.commit()
  59. self.log.debug(f"sql _execute, Query: {query}, Rows: {cursor.rowcount}")
  60. return cursor
  61. except Exception as e:
  62. if commit:
  63. conn.rollback()
  64. self.log.error(f"Error executing query: {e}, Query: {query}, Args: {args}")
  65. raise e
  66. def select_one(self, query, args=None):
  67. """
  68. 执行查询,返回单个结果
  69. :param query: 查询语句
  70. :param args: 查询参数
  71. :return: 查询结果
  72. """
  73. cursor = self._execute(query, args)
  74. return cursor.fetchone()
  75. def select_all(self, query, args=None):
  76. """
  77. 执行查询,返回所有结果
  78. :param query: 查询语句
  79. :param args: 查询参数
  80. :return: 查询结果
  81. """
  82. cursor = self._execute(query, args)
  83. return cursor.fetchall()
  84. def insert_one(self, query, args):
  85. """
  86. 执行单条插入语句
  87. :param query: 插入语句
  88. :param args: 插入参数
  89. """
  90. self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
  91. cursor = self._execute(query, args, commit=True)
  92. return cursor.lastrowid # 返回插入的ID
  93. def insert_all(self, query, args_list):
  94. """
  95. 执行批量插入语句,如果失败则逐条插入
  96. :param query: 插入语句
  97. :param args_list: 插入参数列表
  98. """
  99. conn = None
  100. cursor = None
  101. try:
  102. conn = self.pool.connection()
  103. cursor = conn.cursor()
  104. cursor.executemany(query, args_list)
  105. conn.commit()
  106. self.log.debug(f"sql insert_all, SQL: {query}, Rows: {len(args_list)}")
  107. self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_all 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
  108. except Exception as e:
  109. conn.rollback()
  110. self.log.error(f"Batch insertion failed after 5 attempts. Trying single inserts. Error: {e}")
  111. # 如果批量插入失败,则逐条插入
  112. rowcount = 0
  113. for args in args_list:
  114. self.insert_one(query, args)
  115. rowcount += 1
  116. self.log.debug(f"Batch insertion failed. Inserted {rowcount} rows individually.")
  117. finally:
  118. if cursor:
  119. cursor.close()
  120. if conn:
  121. conn.close()
  122. def insert_one_or_dict(self, table=None, data=None, query=None, args=None, commit=True, ignore=False):
  123. """
  124. 单条插入(支持字典或原始SQL)
  125. :param table: 表名(字典插入时必需)
  126. :param data: 字典数据 {列名: 值}
  127. :param query: 直接SQL语句(与data二选一)
  128. :param args: SQL参数(query使用时必需)
  129. :param commit: 是否自动提交
  130. :param ignore: 是否使用ignore
  131. :return: 最后插入ID
  132. """
  133. if data is not None:
  134. if not isinstance(data, dict):
  135. raise ValueError("Data must be a dictionary")
  136. keys = ', '.join([self._safe_identifier(k) for k in data.keys()])
  137. values = ', '.join(['%s'] * len(data))
  138. # query = f"INSERT INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
  139. # 构建 INSERT IGNORE 语句
  140. ignore_clause = "IGNORE" if ignore else ""
  141. # insert_sql = f"INSERT {ignore_clause} INTO {table} ({columns}) VALUES ({placeholders})"
  142. query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
  143. args = tuple(data.values())
  144. elif query is None:
  145. raise ValueError("Either data or query must be provided")
  146. # cursor = self._execute(query, args, commit)
  147. # self.log.info(f"sql insert_one_or_dict, Table: {table}, Rows: {cursor.rowcount}")
  148. # self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one_or_dict 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
  149. # return cursor.lastrowid
  150. try:
  151. cursor = self._execute(query, args, commit)
  152. self.log.info(f"sql insert_one_or_dict, Table: {table}, Rows: {cursor.rowcount}")
  153. self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one_or_dict 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
  154. return cursor.lastrowid
  155. except pymysql.err.IntegrityError as e:
  156. if "Duplicate entry" in str(e):
  157. self.log.warning(f"插入失败:重复条目,已跳过。错误详情: {e}")
  158. # print("插入失败:重复条目", e)
  159. return -1 # 返回 -1 表示重复条目被跳过
  160. else:
  161. self.log.error(f"数据库完整性错误: {e}")
  162. # print("插入失败:完整性错误", e)
  163. raise e
  164. except Exception as e:
  165. self.log.error(f"未知错误: {e}", exc_info=True)
  166. # print("插入失败:未知错误", e)
  167. raise e
  168. def insert_many(self, table=None, data_list=None, query=None, args_list=None, batch_size=1000, commit=True, ignore=False):
  169. """
  170. 批量插入(支持字典列表或原始SQL)
  171. :param table: 表名(字典插入时必需)
  172. :param data_list: 字典列表 [{列名: 值}]
  173. :param query: 直接SQL语句(与data_list二选一)
  174. :param args_list: SQL参数列表(query使用时必需)
  175. :param batch_size: 分批大小
  176. :param commit: 是否自动提交
  177. :param ignore: 是否使用ignore
  178. :return: 影响行数
  179. """
  180. if data_list is not None:
  181. if not data_list or not isinstance(data_list[0], dict):
  182. raise ValueError("Data_list must be a non-empty list of dictionaries")
  183. keys = ', '.join([self._safe_identifier(k) for k in data_list[0].keys()])
  184. values = ', '.join(['%s'] * len(data_list[0]))
  185. # 构建 INSERT IGNORE 语句
  186. ignore_clause = "IGNORE" if ignore else ""
  187. # insert_sql = f"INSERT {ignore_clause} INTO {table} ({columns}) VALUES ({placeholders})"
  188. query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
  189. args_list = [tuple(d.values()) for d in data_list]
  190. elif query is None:
  191. raise ValueError("Either data_list or query must be provided")
  192. total = 0
  193. for i in range(0, len(args_list), batch_size):
  194. batch = args_list[i:i + batch_size]
  195. try:
  196. with self.pool.connection() as conn:
  197. with conn.cursor() as cursor:
  198. cursor.executemany(query, batch)
  199. if commit:
  200. conn.commit()
  201. total += cursor.rowcount
  202. except pymysql.Error as e:
  203. if "Duplicate entry" in str(e):
  204. # self.log.warning(f"检测到重复条目,开始逐条插入。错误详情: {e}")
  205. raise e
  206. # rowcount = 0
  207. # for args in batch:
  208. # try:
  209. # self.insert_one_or_dict(table=table, data=dict(zip(data_list[0].keys(), args)),
  210. # commit=commit)
  211. # rowcount += 1
  212. # except pymysql.err.IntegrityError as e2:
  213. # if "Duplicate entry" in str(e2):
  214. # self.log.warning(f"跳过重复条目: {args}")
  215. # else:
  216. # self.log.error(f"插入失败: {e2}, 参数: {args}")
  217. # total += rowcount
  218. else:
  219. self.log.error(f"数据库错误: {e}")
  220. if commit:
  221. conn.rollback()
  222. raise e
  223. # 重新抛出异常,供外部捕获
  224. # 降级为单条插入
  225. # for args in batch:
  226. # try:
  227. # self.insert_one_or_dict(table=None, query=query, args=args, commit=commit)
  228. # total += 1
  229. # except Exception as e2:
  230. # self.log.error(f"Single insert failed: {e2}")
  231. # continue
  232. self.log.info(f"sql insert_many, Table: {table}, Total Rows: {total}")
  233. return total
  234. def insert_many_two(self, table=None, data_list=None, query=None, args_list=None, batch_size=1000, commit=True):
  235. """
  236. 批量插入(支持字典列表或原始SQL)
  237. :param table: 表名(字典插入时必需)
  238. :param data_list: 字典列表 [{列名: 值}]
  239. :param query: 直接SQL语句(与data_list二选一)
  240. :param args_list: SQL参数列表(query使用时必需)
  241. :param batch_size: 分批大小
  242. :param commit: 是否自动提交
  243. :return: 影响行数
  244. """
  245. if data_list is not None:
  246. if not data_list or not isinstance(data_list[0], dict):
  247. raise ValueError("Data_list must be a non-empty list of dictionaries")
  248. keys = ', '.join([self._safe_identifier(k) for k in data_list[0].keys()])
  249. values = ', '.join(['%s'] * len(data_list[0]))
  250. query = f"INSERT INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
  251. args_list = [tuple(d.values()) for d in data_list]
  252. elif query is None:
  253. raise ValueError("Either data_list or query must be provided")
  254. total = 0
  255. for i in range(0, len(args_list), batch_size):
  256. batch = args_list[i:i + batch_size]
  257. try:
  258. with self.pool.connection() as conn:
  259. with conn.cursor() as cursor:
  260. # 添加调试日志:输出 SQL 和参数示例
  261. # self.log.debug(f"Batch insert SQL: {query}")
  262. # self.log.debug(f"Sample args: {batch[0] if batch else 'None'}")
  263. cursor.executemany(query, batch)
  264. if commit:
  265. conn.commit()
  266. total += cursor.rowcount
  267. # self.log.debug(f"Batch insert succeeded. Rows: {cursor.rowcount}")
  268. except Exception as e: # 明确捕获数据库异常
  269. self.log.exception(f"Batch insert failed: {e}") # 使用 exception 记录堆栈
  270. self.log.error(f"Failed SQL: {query}, Args count: {len(batch)}")
  271. if commit:
  272. conn.rollback()
  273. # 降级为单条插入,并记录每个错误
  274. rowcount = 0
  275. for args in batch:
  276. try:
  277. self.insert_one(query, args)
  278. rowcount += 1
  279. except Exception as e2:
  280. self.log.error(f"Single insert failed: {e2}, Args: {args}")
  281. total += rowcount
  282. self.log.debug(f"Inserted {rowcount}/{len(batch)} rows individually.")
  283. self.log.info(f"sql insert_many, Table: {table}, Total Rows: {total}")
  284. return total
  285. def insert_too_many(self, query, args_list, batch_size=1000):
  286. """
  287. 执行批量插入语句,分片提交, 单次插入大于十万+时可用, 如果失败则降级为逐条插入
  288. :param query: 插入语句
  289. :param args_list: 插入参数列表
  290. :param batch_size: 每次插入的条数
  291. """
  292. for i in range(0, len(args_list), batch_size):
  293. batch = args_list[i:i + batch_size]
  294. try:
  295. with self.pool.connection() as conn:
  296. with conn.cursor() as cursor:
  297. cursor.executemany(query, batch)
  298. conn.commit()
  299. except Exception as e:
  300. self.log.error(f"insert_too_many error. Trying single insert. Error: {e}")
  301. # 当前批次降级为单条插入
  302. for args in batch:
  303. self.insert_one(query, args)
  304. def update_one(self, query, args):
  305. """
  306. 执行单条更新语句
  307. :param query: 更新语句
  308. :param args: 更新参数
  309. """
  310. self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data update_one 更新中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
  311. return self._execute(query, args, commit=True)
  312. def update_all(self, query, args_list):
  313. """
  314. 执行批量更新语句,如果失败则逐条更新
  315. :param query: 更新语句
  316. :param args_list: 更新参数列表
  317. """
  318. conn = None
  319. cursor = None
  320. try:
  321. conn = self.pool.connection()
  322. cursor = conn.cursor()
  323. cursor.executemany(query, args_list)
  324. conn.commit()
  325. self.log.debug(f"sql update_all, SQL: {query}, Rows: {len(args_list)}")
  326. self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data update_all 更新中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
  327. except Exception as e:
  328. conn.rollback()
  329. self.log.error(f"Error executing query: {e}")
  330. # 如果批量更新失败,则逐条更新
  331. rowcount = 0
  332. for args in args_list:
  333. self.update_one(query, args)
  334. rowcount += 1
  335. self.log.debug(f'Batch update failed. Updated {rowcount} rows individually.')
  336. finally:
  337. if cursor:
  338. cursor.close()
  339. if conn:
  340. conn.close()
  341. def update_one_or_dict(self, table=None, data=None, condition=None, query=None, args=None, commit=True):
  342. """
  343. 单条更新(支持字典或原始SQL)
  344. :param table: 表名(字典模式必需)
  345. :param data: 字典数据 {列名: 值}(与 query 二选一)
  346. :param condition: 更新条件,支持以下格式:
  347. - 字典: {"id": 1} → "WHERE id = %s"
  348. - 字符串: "id = 1" → "WHERE id = 1"(需自行确保安全)
  349. - 元组: ("id = %s", [1]) → "WHERE id = %s"(参数化查询)
  350. :param query: 直接SQL语句(与 data 二选一)
  351. :param args: SQL参数(query 模式下必需)
  352. :param commit: 是否自动提交
  353. :return: 影响行数
  354. :raises: ValueError 参数校验失败时抛出
  355. """
  356. # 参数校验
  357. if data is not None:
  358. if not isinstance(data, dict):
  359. raise ValueError("Data must be a dictionary")
  360. if table is None:
  361. raise ValueError("Table name is required for dictionary update")
  362. if condition is None:
  363. raise ValueError("Condition is required for dictionary update")
  364. # 构建 SET 子句
  365. set_clause = ", ".join([f"{self._safe_identifier(k)} = %s" for k in data.keys()])
  366. set_values = list(data.values())
  367. # 解析条件
  368. condition_clause, condition_args = self._parse_condition(condition)
  369. query = f"UPDATE {self._safe_identifier(table)} SET {set_clause} WHERE {condition_clause}"
  370. args = set_values + condition_args
  371. elif query is None:
  372. raise ValueError("Either data or query must be provided")
  373. # 执行更新
  374. cursor = self._execute(query, args, commit)
  375. # self.log.debug(
  376. # f"Updated table={table}, rows={cursor.rowcount}, query={query[:100]}...",
  377. # extra={"table": table, "rows": cursor.rowcount}
  378. # )
  379. return cursor.rowcount
  380. def _parse_condition(self, condition):
  381. """
  382. 解析条件为 (clause, args) 格式
  383. :param condition: 字典/字符串/元组
  384. :return: (str, list) SQL 子句和参数列表
  385. """
  386. if isinstance(condition, dict):
  387. clause = " AND ".join([f"{self._safe_identifier(k)} = %s" for k in condition.keys()])
  388. args = list(condition.values())
  389. elif isinstance(condition, str):
  390. clause = condition # 注意:需调用方确保安全
  391. args = []
  392. elif isinstance(condition, (tuple, list)) and len(condition) == 2:
  393. clause, args = condition[0], condition[1]
  394. if not isinstance(args, (list, tuple)):
  395. args = [args]
  396. else:
  397. raise ValueError("Condition must be dict/str/(clause, args)")
  398. return clause, args
  399. def update_many(self, table=None, data_list=None, condition_list=None, query=None, args_list=None, batch_size=500,
  400. commit=True):
  401. """
  402. 批量更新(支持字典列表或原始SQL)
  403. :param table: 表名(字典插入时必需)
  404. :param data_list: 字典列表 [{列名: 值}]
  405. :param condition_list: 条件列表(必须为字典,与data_list等长)
  406. :param query: 直接SQL语句(与data_list二选一)
  407. :param args_list: SQL参数列表(query使用时必需)
  408. :param batch_size: 分批大小
  409. :param commit: 是否自动提交
  410. :return: 影响行数
  411. """
  412. if data_list is not None:
  413. if not data_list or not isinstance(data_list[0], dict):
  414. raise ValueError("Data_list must be a non-empty list of dictionaries")
  415. if condition_list is None or len(data_list) != len(condition_list):
  416. raise ValueError("Condition_list must be provided and match the length of data_list")
  417. if not all(isinstance(cond, dict) for cond in condition_list):
  418. raise ValueError("All elements in condition_list must be dictionaries")
  419. # 获取第一个数据项和条件项的键
  420. first_data_keys = set(data_list[0].keys())
  421. first_cond_keys = set(condition_list[0].keys())
  422. # 构造基础SQL
  423. set_clause = ', '.join([self._safe_identifier(k) + ' = %s' for k in data_list[0].keys()])
  424. condition_clause = ' AND '.join([self._safe_identifier(k) + ' = %s' for k in condition_list[0].keys()])
  425. base_query = f"UPDATE {self._safe_identifier(table)} SET {set_clause} WHERE {condition_clause}"
  426. total = 0
  427. # 分批次处理
  428. for i in range(0, len(data_list), batch_size):
  429. batch_data = data_list[i:i + batch_size]
  430. batch_conds = condition_list[i:i + batch_size]
  431. batch_args = []
  432. # 检查当前批次的结构是否一致
  433. can_batch = True
  434. for data, cond in zip(batch_data, batch_conds):
  435. data_keys = set(data.keys())
  436. cond_keys = set(cond.keys())
  437. if data_keys != first_data_keys or cond_keys != first_cond_keys:
  438. can_batch = False
  439. break
  440. batch_args.append(tuple(data.values()) + tuple(cond.values()))
  441. if not can_batch:
  442. # 结构不一致,转为单条更新
  443. for data, cond in zip(batch_data, batch_conds):
  444. self.update_one_or_dict(table=table, data=data, condition=cond, commit=commit)
  445. total += 1
  446. continue
  447. # 执行批量更新
  448. try:
  449. with self.pool.connection() as conn:
  450. with conn.cursor() as cursor:
  451. cursor.executemany(base_query, batch_args)
  452. if commit:
  453. conn.commit()
  454. total += cursor.rowcount
  455. self.log.debug(f"Batch update succeeded. Rows: {cursor.rowcount}")
  456. except Exception as e:
  457. if commit:
  458. conn.rollback()
  459. self.log.error(f"Batch update failed: {e}")
  460. # 降级为单条更新
  461. for args, data, cond in zip(batch_args, batch_data, batch_conds):
  462. try:
  463. self._execute(base_query, args, commit=commit)
  464. total += 1
  465. except Exception as e2:
  466. self.log.error(f"Single update failed: {e2}, Data: {data}, Condition: {cond}")
  467. self.log.info(f"Total updated rows: {total}")
  468. return total
  469. elif query is not None:
  470. # 处理原始SQL和参数列表
  471. if args_list is None:
  472. raise ValueError("args_list must be provided when using query")
  473. total = 0
  474. for i in range(0, len(args_list), batch_size):
  475. batch_args = args_list[i:i + batch_size]
  476. try:
  477. with self.pool.connection() as conn:
  478. with conn.cursor() as cursor:
  479. cursor.executemany(query, batch_args)
  480. if commit:
  481. conn.commit()
  482. total += cursor.rowcount
  483. self.log.debug(f"Batch update succeeded. Rows: {cursor.rowcount}")
  484. except Exception as e:
  485. if commit:
  486. conn.rollback()
  487. self.log.error(f"Batch update failed: {e}")
  488. # 降级为单条更新
  489. for args in batch_args:
  490. try:
  491. self._execute(query, args, commit=commit)
  492. total += 1
  493. except Exception as e2:
  494. self.log.error(f"Single update failed: {e2}, Args: {args}")
  495. self.log.info(f"Total updated rows: {total}")
  496. return total
  497. else:
  498. raise ValueError("Either data_list or query must be provided")
  499. def check_pool_health(self):
  500. """
  501. 检查连接池中有效连接数
  502. # 使用示例
  503. # 配置 MySQL 连接池
  504. sql_pool = MySQLConnectionPool(log=log)
  505. if not sql_pool.check_pool_health():
  506. log.error("数据库连接池异常")
  507. raise RuntimeError("数据库连接池异常")
  508. """
  509. try:
  510. with self.pool.connection() as conn:
  511. conn.ping(reconnect=True)
  512. return True
  513. except Exception as e:
  514. self.log.error(f"Connection pool health check failed: {e}")
  515. return False
  516. @staticmethod
  517. def _safe_identifier(name):
  518. """SQL标识符安全校验"""
  519. if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
  520. raise ValueError(f"Invalid SQL identifier: {name}")
  521. return name