ソースを参照

feat(bin): datax-sync-template-gen 加 --schema-md(输出 PG 全字段 metadata markdown 表)

tianyu.chu 1 週間 前
コミット
d699134ae2
2 ファイル変更80 行追加5 行削除
  1. 53 5
      bin/datax-sync-template-gen.py
  2. 27 0
      tests/unit/datax/test_sync_template_gen.py

+ 53 - 5
bin/datax-sync-template-gen.py

@@ -99,6 +99,45 @@ def query_primary_key(conn, schema, table):
     return ''
 
 
+def query_columns_full(conn, schema, table):
+    """带序号 / 类型 / 主键标识的全字段 metadata 查询,按 attnum 排序。
+
+    返回 [(attnum, attname, comment, pg_type, pk_flag), ...]
+    """
+    cur = conn.cursor()
+    cur.execute("""
+        SELECT
+            a.attnum,
+            a.attname,
+            pg_catalog.col_description(a.attrelid, a.attnum),
+            pg_catalog.format_type(a.atttypid, a.atttypmod),
+            CASE WHEN EXISTS (
+                SELECT 1 FROM pg_index i
+                WHERE i.indrelid = a.attrelid AND i.indisprimary
+                  AND a.attnum = ANY(i.indkey)
+            ) THEN 'PK' ELSE '' END
+        FROM pg_catalog.pg_attribute a
+        JOIN pg_catalog.pg_class c ON a.attrelid = c.oid
+        JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
+        WHERE n.nspname = %s AND c.relname = %s
+          AND a.attnum > 0 AND NOT a.attisdropped
+        ORDER BY a.attnum
+    """, (schema, table))
+    return cur.fetchall()
+
+
+def render_schema_md(rows):
+    """输出 markdown 表格:序号 / 字段名 / 中文名 / 数据类型 / 主键标识 / 裁剪类型(空,开发者填)"""
+    lines = [
+        '| 序号 | 字段名 | 中文名 | 数据类型 | 主键标识 | 裁剪类型 |',
+        '| --- | --- | --- | --- | --- | --- |',
+    ]
+    for num, name, comment, typ, pk in rows:
+        lines.append('| {} | `{}` | {} | {} | {} |  |'.format(
+            num, name, comment or '', typ, pk))
+    return '\n'.join(lines) + '\n'
+
+
 def render_template(ds_ref, database, schema, table, columns, pk):
     column_str = ','.join(c for c, _ in columns)
     today = datetime.now().strftime('%Y-%m-%d')
@@ -151,6 +190,8 @@ def main():
                         help='schema 限定的表名(如 public.card_group_order_info)')
     parser.add_argument('-o', nargs='?', const=WORKSPACE_DEFAULT, default=None, metavar='DIR',
                         help='输出目录(不传 stdout;不带值 workspace/{yyyymmdd}/;带值自定义)')
+    parser.add_argument('--schema-md', action='store_true',
+                        help='改为输出 PG 全字段 metadata markdown 表(序号/字段名/中文名/数据类型/主键标识/裁剪类型空列),用于 kb/24 raw 建模文档')
     args = parser.parse_args()
 
     if '.' not in args.t:
@@ -171,18 +212,25 @@ def main():
         user=user, password=password,
     )
     try:
-        columns = query_columns(conn, schema, table)
-        pk = query_primary_key(conn, schema, table)
+        if args.schema_md:
+            rows = query_columns_full(conn, schema, table)
+            if not rows:
+                raise ValueError('表不存在或无字段: {}.{}'.format(schema, table))
+            content = render_schema_md(rows)
+            out_suffix = '.md'
+        else:
+            columns = query_columns(conn, schema, table)
+            pk = query_primary_key(conn, schema, table)
+            content = render_template(args.ds, database, schema, table, columns, pk)
+            out_suffix = '.ini'
     finally:
         conn.close()
 
-    content = render_template(args.ds, database, schema, table, columns, pk)
-
     if args.o is None:
         sys.stdout.write(content)
     else:
         os.makedirs(args.o, exist_ok=True)
-        out_path = os.path.join(args.o, table + '.ini')
+        out_path = os.path.join(args.o, table + out_suffix)
         with open(out_path, 'w', encoding='utf-8') as f:
             f.write(content)
         print('已写入: ' + out_path, file=sys.stderr)

+ 27 - 0
tests/unit/datax/test_sync_template_gen.py

@@ -107,6 +107,33 @@ def test_render_template_includes_required_fields():
     assert 'fileName = users_TODO_d' in out
 
 
+def test_query_columns_full_returns_full_metadata():
+    conn = MagicMock()
+    cur = conn.cursor.return_value
+    cur.fetchall.return_value = [
+        (1, 'id', 'id', 'bigint', 'PK'),
+        (2, 'name', '名称', 'character varying', ''),
+    ]
+    rows = GEN.query_columns_full(conn, 'public', 'orders')
+    assert rows == [
+        (1, 'id', 'id', 'bigint', 'PK'),
+        (2, 'name', '名称', 'character varying', ''),
+    ]
+
+
+def test_render_schema_md_table_format():
+    rows = [
+        (1, 'id', 'id', 'bigint', 'PK'),
+        (2, 'user_name', '用户名', 'character varying', ''),
+        (3, 'create_time', None, 'timestamp without time zone', ''),  # 无注释
+    ]
+    out = GEN.render_schema_md(rows)
+    assert '| 序号 | 字段名 | 中文名 | 数据类型 | 主键标识 | 裁剪类型 |' in out
+    assert '| 1 | `id` | id | bigint | PK |  |' in out
+    assert '| 2 | `user_name` | 用户名 | character varying |  |  |' in out
+    assert '| 3 | `create_time` |  | timestamp without time zone |  |  |' in out
+
+
 def test_render_template_empty_pk():
     out = GEN.render_template(
         ds_ref='postgresql/prod-hobby', database='db', schema='public',