业务场景:本地数据库(Oracle)有200张表需要同步到odps,需要先在odps建表,再进行表数据同步。如果一张张的去建表,很浪费时间。如果后续还有这种业务,则考虑用Python写脚本,读取本地数据库这些表的表结构,在odps上进行批量建表,字段类型统一用string。
import cx_Oracle from odps import ODPS, options def create_table(table_name, table_comments, owner): #执行查询表结构语句,获取表结构信息 col_sql = '''select COLUMN_NAME,COMMENTS from all_col_comments where table_name = '{}' and owner = '{}' '''.format( table_name, owner) #print(col_sql) #打印SQL col_data = cursor.execute(col_sql).fetchall() # print(type(col_data)) #list readcolumnlist = [] #存放字段 commentslist = [] #存放字段注释 for col in col_data: col_name = col[0] # 字段 comments = col[1] # 字段注释 readcolumnlist.append(col_name) commentslist.append(comments) # print(comments) # print(readcolumnlist) # print(commentslist) create_table_sql = "CREATE TABLE IF NOT EXISTS 项目空间名称." + table_name + "(n" column_comment_sql = "" #生成建表语句 for idx in range(len(readcolumnlist)): if idx != len(readcolumnlist) - 1: create_table_sql += readcolumnlist[idx] + " STRING COMMENT '" + (commentslist[idx] if commentslist[idx] else "") + "'," + 'n' else: create_table_sql += readcolumnlist[idx] + " STRING COMMENT '" + (commentslist[idx] if commentslist[idx] else "") + "'" + 'n' create_table_sql += ")" + "nCOMMENT '{}';n".format(table_comments) print(create_table_sql) odps.execute_sql(create_table_sql) #在odps建表 if __name__ == '__main__': accessId = "填写用户id" accessKey = "填写用户key" project = "填写项目空间名称" endpoint = "填写odps访问地址" odps = ODPS(accessId, accessKey, project, endpoint=endpoint) ''' 本地哪些库的表要同步到odps,就把库名OWNER ,表名TABLE_NAME, 表备注COMMENTS 等信息存到一张表里TABLE_DATA。 ''' select_sql = "SELECT TABLE_NAME,COMMENTS ,OWNER FROM DMP.TABLE_DATA" db_conn = cx_Oracle.connect("user", "password", "10.0.0.0:15215/orcl") cursor = db_conn.cursor() cursor.execute(select_sql) results = cursor.fetchall() for result in results: table_name = result[0] # 表名 table_comments = result[1] #表备注 owner = result[2] #用户,即表在哪个数据库 # print(table_name) #str类型 # print(table_comments) #str类型 create_table(table_name, table_comments, owner)#调用建表方法