clickhouse_ddl_export/clickhouse_ddl_export.py

87 lines
3.2 KiB
Python
Raw Permalink Normal View History

2024-07-13 10:40:55 +02:00
#!/usr/bin/env python3
# install:
# #pip3 install clickhouse-connect
import argparse
import re
import json
import clickhouse_connect
DUMPFILE = "/tmp/ch_export"
createre = re.compile("(?P<begin>CREATE (TABLE|(MATERIALIZED )?VIEW)) (?P<database>\w+)\.(?P<table>\w+) (?P<query>.*)")
2024-07-14 11:15:39 +02:00
database_exclude = [
"default",
"INFORMATION_SCHEMA",
"information_schema",
"system",
]
2024-07-13 10:40:55 +02:00
def get_databases_tables(client):
databases = {}
2024-07-14 11:15:39 +02:00
db_exclude_join = ["'{}'".format(i) for i in database_exclude]
q_db_exclude = "({})".format(",".join(db_exclude_join))
q_db = client.query(f"SELECT database,toString(uuid) FROM system.databases WHERE database NOT IN {q_db_exclude};")
2024-07-13 10:40:55 +02:00
for d in q_db.result_rows:
dbname = d[0]
2024-07-14 11:15:39 +02:00
dbuuid = d[1]
databases[dbname] = {"ddl": f"CREATE DATABASE {dbname} UUID '{dbuuid}'", "tables":{}}
2024-07-13 10:40:55 +02:00
q_tables = client.query(f"show tables from {dbname};")
for t in q_tables.result_rows:
tablename = t[0]
q_table_schema = client.query(f"SELECT create_table_query, uuid from system.tables where database='{dbname}' and table='{tablename}';")
for schema in q_table_schema.result_rows:
create_table_database = schema[0]
uuid = schema[1]
resre = createre.match(create_table_database)
begin = resre.group("begin")
database = resre.group("database")
table = resre.group("table")
query = resre.group("query")
fullquery = f"{begin} {database}.{table} UUID '{uuid}' {query}"
databases[dbname]["tables"][tablename] = fullquery
return databases
def export(dumpfile=DUMPFILE):
client = clickhouse_connect.get_client(host='localhost', username='default', password='')
databases = get_databases_tables(client)
2024-07-14 10:53:19 +02:00
print(f"exporting to {dumpfile}")
2024-07-13 10:40:55 +02:00
with open(dumpfile, "w") as f:
json.dump(databases, f, indent=4)
return
def dump(dumpfile=DUMPFILE):
2024-07-14 10:53:19 +02:00
try:
with open(dumpfile, "r") as f:
a = json.load(f)
2024-07-14 11:18:40 +02:00
for database, values in a.items():
2024-07-14 10:53:19 +02:00
print(f"--- database {database} ---")
2024-07-14 11:18:40 +02:00
create_database = values["ddl"]
print(f"{create_database};\n")
for table, create_table in values["tables"].items():
2024-07-14 10:53:19 +02:00
print(f"-- table {database}.{table} --")
2024-07-14 11:18:40 +02:00
print(f"{create_table};\n")
2024-07-14 10:53:19 +02:00
except FileNotFoundError as e:
print(e)
2024-07-14 11:15:39 +02:00
print("-- to re-create replicas, run 'clickhouse-client --multiquery < generated_file.sql' on new replica server")
2024-07-13 10:40:55 +02:00
def main():
parser = argparse.ArgumentParser(
prog="clickhouse_ddl_export",
description="Exports clickhouse DDL, useful when adding replicas in a cluster")
parser.add_argument("dumpfile", default=DUMPFILE, nargs="?")
group = parser.add_mutually_exclusive_group()
group.add_argument("--export", action="store_true")
group.add_argument("--print-statements", action="store_true")
args = parser.parse_args()
if args.export:
export(args.dumpfile)
2024-07-14 10:53:19 +02:00
elif args.print_statements:
2024-07-13 10:40:55 +02:00
dump(args.dumpfile)
2024-07-14 10:53:19 +02:00
else:
parser.print_help()
2024-07-13 10:40:55 +02:00
if __name__ == "__main__":
main()