From 1f7f8489e5387021a3ca0d948a7caf6c930207fd Mon Sep 17 00:00:00 2001 From: Egor Medvedev Date: Thu, 12 Sep 2024 13:52:08 +0100 Subject: [PATCH] Wait replication-sync returns valid exit-code while CH connection error --- ch_tools/chadmin/cli/wait_group.py | 31 ++++++++++++++++-------------- tests/features/chadmin.feature | 18 ++++++++++++++++- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/ch_tools/chadmin/cli/wait_group.py b/ch_tools/chadmin/cli/wait_group.py index 5691446d..1c8ea053 100644 --- a/ch_tools/chadmin/cli/wait_group.py +++ b/ch_tools/chadmin/cli/wait_group.py @@ -2,8 +2,8 @@ import sys import time +import requests from click import FloatRange, group, option, pass_context -from requests.exceptions import ReadTimeout from ch_tools.chadmin.cli.chadmin_group import Chadmin from ch_tools.chadmin.internal.clickhouse_disks import S3_METADATA_STORE_PATH @@ -98,13 +98,13 @@ def wait_replication_sync_command( start_time = time.time() deadline = start_time + total_timeout.total_seconds() - # Sync tables in cycle - for replica in list_table_replicas(ctx): - full_name = f"`{replica['database']}`.`{replica['table']}`" - time_left = deadline - time.time() - timeout = min(replica_timeout.total_seconds(), time_left) + try: + # Sync tables in cycle + for replica in list_table_replicas(ctx): + full_name = f"`{replica['database']}`.`{replica['table']}`" + time_left = deadline - time.time() + timeout = min(replica_timeout.total_seconds(), time_left) - try: execute_query( ctx, f"SYSTEM SYNC REPLICA {full_name}", @@ -112,14 +112,17 @@ def wait_replication_sync_command( timeout=timeout, settings={"receive_timeout": timeout}, ) - except ReadTimeout: - logging.error("Timeout while running SYNC REPLICA on {}.", full_name) + except requests.exceptions.ReadTimeout: + logging.error("Read timeout while running query.") + sys.exit(1) + except requests.exceptions.ConnectionError: + logging.error("Connection error while running query.") + sys.exit(1) + except ClickhouseError as e: + if "TIMEOUT_EXCEEDED" in str(e): + logging.error("Timeout while running query.") sys.exit(1) - except ClickhouseError as e: - if "TIMEOUT_EXCEEDED" in str(e): - logging.error("Timeout while running SYNC REPLICA on {}.", full_name) - sys.exit(1) - raise + raise # Replication lag while time.time() < deadline: diff --git a/tests/features/chadmin.feature b/tests/features/chadmin.feature index be762541..b571c5a3 100644 --- a/tests/features/chadmin.feature +++ b/tests/features/chadmin.feature @@ -145,7 +145,7 @@ Feature: chadmin commands. """ Then it fails with response contains """ - Timeout while running SYNC REPLICA on + Read timeout while running query. """ When we execute query on clickhouse01 """ @@ -153,6 +153,22 @@ Feature: chadmin commands. """ When we execute command on clickhouse01 """ + supervisorctl stop clickhouse-server + """ + When we try to execute command on clickhouse01 + """ + chadmin wait replication-sync --total-timeout 10 --replica-timeout 3 -p 1 -w 4 + """ + Then it fails with response contains + """ + Connection error while running query. + """ + When we execute command on clickhouse01 + """ + supervisorctl start clickhouse-server + """ + When we execute command on clickhouse01 + """ chadmin wait replication-sync --total-timeout 10 --replica-timeout 3 -p 1 -w 4 """