From 2455a5422a43dcf2a5f057099d3973f6b1e430fe Mon Sep 17 00:00:00 2001 From: NikitaUnisikhin <92248695+NikitaUnisikhin@users.noreply.github.com> Date: Sun, 11 Aug 2024 18:26:00 +0300 Subject: [PATCH] Store state data in zookeeper (#219) * Added store stats in zk * monrun check rename new options --------- Co-authored-by: Nikita Unisikhin --- ch_tools/chadmin/cli/object_storage_group.py | 40 +++++++-- ch_tools/monrun_checks/ch_orphaned_objects.py | 63 ++++++++++++-- tests/features/monrun.feature | 82 +++++++++++++++++-- tests/features/object_storage.feature | 34 +++++++- tests/steps/zookeeper.py | 15 +++- 5 files changed, 210 insertions(+), 24 deletions(-) diff --git a/ch_tools/chadmin/cli/object_storage_group.py b/ch_tools/chadmin/cli/object_storage_group.py index c7e89875..205408aa 100644 --- a/ch_tools/chadmin/cli/object_storage_group.py +++ b/ch_tools/chadmin/cli/object_storage_group.py @@ -6,6 +6,11 @@ from humanfriendly import format_size from ch_tools.chadmin.cli.chadmin_group import Chadmin +from ch_tools.chadmin.internal.zookeeper import ( + check_zk_node, + create_zk_nodes, + update_zk_nodes, +) from ch_tools.common.cli.formatting import print_response from ch_tools.common.cli.parameters import TimeSpanParamType from ch_tools.common.clickhouse.config import get_clickhouse_config @@ -14,8 +19,8 @@ # Use big enough timeout for stream HTTP query STREAM_TIMEOUT = 10 * 60 -STORE_STATE_PATH = "/tmp/object_storage_cleanup_state.json" ORPHANED_OBJECTS_SIZE_FIELD = "orphaned_objects_size" +STATE_LOCAL_PATH = "/tmp/object_storage_cleanup_state.json" @group("object-storage", cls=Chadmin) @@ -101,11 +106,17 @@ def object_storage_group(ctx: Context, disk_name: str) -> None: help=("Use saved object list without traversing object storage again."), ) @option( - "--store-state", - "store_state", + "--store-state-local", + "store_state_local", is_flag=True, help=("Write total size of orphaned objects to log file."), ) +@option( + "--store-state-zk-path", + "store_state_zk_path", + default="", + help=("Zookeeper node path for storage total size of orphaned objects."), +) @pass_context def clean_command( ctx: Context, @@ -117,7 +128,8 @@ def clean_command( dry_run: bool, keep_paths: bool, use_saved_list: bool, - store_state: bool, + store_state_local: bool, + store_state_zk_path: str, ) -> None: """ Clean orphaned S3 objects. @@ -134,13 +146,27 @@ def clean_command( use_saved_list, ) - if store_state: - with open(STORE_STATE_PATH, "w", encoding="utf-8") as file: - json.dump({ORPHANED_OBJECTS_SIZE_FIELD: total_size}, file, indent=4) + if store_state_zk_path: + _store_state_zk_save(ctx, store_state_zk_path, total_size) + + if store_state_local: + _store_state_local_save(ctx, total_size) _print_response(ctx, dry_run, deleted, total_size) +def _store_state_zk_save(ctx: Context, path: str, total_size: int) -> None: + if not check_zk_node(ctx, path): + create_zk_nodes(ctx, [path], make_parents=True) + state_data = json.dumps({ORPHANED_OBJECTS_SIZE_FIELD: total_size}, indent=4) + update_zk_nodes(ctx, [path], state_data.encode("utf-8")) + + +def _store_state_local_save(_: Context, total_size: int) -> None: + with open(STATE_LOCAL_PATH, "w", encoding="utf-8") as file: + json.dump({ORPHANED_OBJECTS_SIZE_FIELD: total_size}, file, indent=4) + + def _print_response(ctx: Context, dry_run: bool, deleted: int, total_size: int) -> None: """ Outputs result of cleaning. diff --git a/ch_tools/monrun_checks/ch_orphaned_objects.py b/ch_tools/monrun_checks/ch_orphaned_objects.py index 72aec362..36c0f03b 100644 --- a/ch_tools/monrun_checks/ch_orphaned_objects.py +++ b/ch_tools/monrun_checks/ch_orphaned_objects.py @@ -4,12 +4,24 @@ from ch_tools.chadmin.cli.object_storage_group import ( ORPHANED_OBJECTS_SIZE_FIELD, - STORE_STATE_PATH, + STATE_LOCAL_PATH, ) +from ch_tools.chadmin.internal.zookeeper import check_zk_node, get_zk_node from ch_tools.common.result import CRIT, OK, WARNING, Result @click.command("orphaned-objects") +@click.option( + "--state-local", + "state_local", + is_flag=True, + help="Get total size of orphaned objects from local file.", +) +@click.option( + "--state-zk-path", + "state_zk_path", + help="Zookeeper node path from which the total size of orphaned objects will be taken.", +) @click.option( "-c", "--critical", @@ -28,18 +40,55 @@ ) @click.pass_context def orphaned_objects_command( - _: click.Context, + ctx: click.Context, + state_local: bool, + state_zk_path: str, crit: int, warn: int, ) -> Result: - try: - with open(STORE_STATE_PATH, mode="r", encoding="utf-8") as file: - total_size = json.load(file).get(ORPHANED_OBJECTS_SIZE_FIELD) - except FileNotFoundError: - total_size = 0 + _check_mutually_exclusive(state_local, state_zk_path) + + total_size = 0 + if state_zk_path: + total_size = _zk_get_total_size(ctx, state_zk_path) + + if state_local: + total_size = _local_get_total_size() + msg = f"Total size: {total_size}" if total_size >= crit: return Result(CRIT, msg) if total_size >= warn: return Result(WARNING, msg) return Result(OK, msg) + + +def _check_mutually_exclusive(state_local, state_zk_path): + if not state_local and not state_zk_path: + raise click.UsageError( + "One of these options must be provided: --state-local, --state-zk-path" + ) + + if state_local and state_zk_path: + raise click.UsageError( + "Options --state-local and --state-zk-path are mutually exclusive." + ) + + +def _local_get_total_size() -> int: + try: + with open(STATE_LOCAL_PATH, mode="r", encoding="utf-8") as file: + total_size = json.load(file).get(ORPHANED_OBJECTS_SIZE_FIELD) + except FileNotFoundError: + total_size = 0 + + return total_size + + +def _zk_get_total_size(ctx: click.Context, state_zk_path: str) -> int: + total_size = 0 + if check_zk_node(ctx, state_zk_path): + total_size = json.loads(get_zk_node(ctx, state_zk_path)).get( + ORPHANED_OBJECTS_SIZE_FIELD + ) + return total_size diff --git a/tests/features/monrun.feature b/tests/features/monrun.feature index 71358566..0864f51c 100644 --- a/tests/features/monrun.feature +++ b/tests/features/monrun.feature @@ -389,10 +389,14 @@ Feature: ch-monitoring tool 2;KazooTimeoutError('Connection time-out') """ - Scenario: Check CH store state + Scenario: Check clickhouse orphaned objects with state-zk-path option When we execute command on clickhouse01 """ - ch-monitoring orphaned-objects + chadmin object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-zk-path /tmp/shard_1 + """ + When we execute command on clickhouse01 + """ + ch-monitoring orphaned-objects --state-zk-path /tmp/shard_1 """ Then we get response """ @@ -406,11 +410,11 @@ Feature: ch-monitoring tool """ When we execute command on clickhouse01 """ - chadmin object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state + chadmin object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-zk-path /tmp/shard_1 """ When we execute command on clickhouse01 """ - ch-monitoring orphaned-objects + ch-monitoring orphaned-objects --state-zk-path /tmp/shard_1 """ Then we get response contains """ @@ -418,7 +422,7 @@ Feature: ch-monitoring tool """ When we execute command on clickhouse01 """ - ch-monitoring orphaned-objects -w 9 -c 19 + ch-monitoring orphaned-objects -w 9 -c 19 --state-zk-path /tmp/shard_1 """ Then we get response contains """ @@ -426,9 +430,75 @@ Feature: ch-monitoring tool """ When we execute command on clickhouse01 """ - ch-monitoring orphaned-objects -w 4 -c 9 + ch-monitoring orphaned-objects -w 4 -c 9 --state-zk-path /tmp/shard_1 """ Then we get response contains """ 2;Total size: 10 """ + + Scenario: Check clickhouse orphaned objects with state-local option + When we execute command on clickhouse01 + """ + chadmin object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-local + """ + When we execute command on clickhouse01 + """ + ch-monitoring orphaned-objects --state-local + """ + Then we get response + """ + 0;Total size: 0 + """ + When we put object in S3 + """ + bucket: cloud-storage-test + path: /data/orpaned_object.tsv + data: '1234567890' + """ + When we execute command on clickhouse01 + """ + chadmin object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-local + """ + When we execute command on clickhouse01 + """ + ch-monitoring orphaned-objects --state-local + """ + Then we get response contains + """ + 0;Total size: 10 + """ + When we execute command on clickhouse01 + """ + ch-monitoring orphaned-objects -w 9 -c 19 --state-local + """ + Then we get response contains + """ + 1;Total size: 10 + """ + When we execute command on clickhouse01 + """ + ch-monitoring orphaned-objects -w 4 -c 9 --state-local + """ + Then we get response contains + """ + 2;Total size: 10 + """ + + Scenario: Check clickhouse orphaned objects --state-local and --state-zk-path are mutually exclusive + When we execute command on clickhouse01 + """ + ch-monitoring orphaned-objects -w 9 -c 19 --state-local --state-zk-path /tmp/shard_1 + """ + Then we get response contains + """ + 1;Unknown error: Options --state-local and --state-zk-path are mutually exclusive. + """ + When we execute command on clickhouse01 + """ + ch-monitoring orphaned-objects -w 9 -c 19 + """ + Then we get response contains + """ + 1;Unknown error: One of these options must be provided: --state-local, --state-zk-path + """ diff --git a/tests/features/object_storage.feature b/tests/features/object_storage.feature index adfe9620..c9b2a80e 100644 --- a/tests/features/object_storage.feature +++ b/tests/features/object_storage.feature @@ -170,10 +170,38 @@ Feature: chadmin object-storage commands TotalSize: 3 """ - Scenario: Clean with flag store-state works + Scenario: Clean with store-state-zk-path works When we execute command on clickhouse01 """ - chadmin --format yaml object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state + chadmin --format yaml object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-zk-path /tmp/shard_1 + """ + Then we get zookeeper node with "/tmp/shard_1" path + """ + { + "orphaned_objects_size": 0 + } + """ + When we put object in S3 + """ + bucket: cloud-storage-test + path: /data/orpaned_object.tsv + data: '1234567890' + """ + When we execute command on clickhouse01 + """ + chadmin --format yaml object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-zk-path /tmp/shard_1 + """ + Then we get zookeeper node with "/tmp/shard_1" path + """ + { + "orphaned_objects_size": 10 + } + """ + + Scenario: Clean with store-state-local works + When we execute command on clickhouse01 + """ + chadmin --format yaml object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-local """ Then we get file /tmp/object_storage_cleanup_state.json """ @@ -189,7 +217,7 @@ Feature: chadmin object-storage commands """ When we execute command on clickhouse01 """ - chadmin --format yaml object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state + chadmin --format yaml object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-local """ Then we get file /tmp/object_storage_cleanup_state.json """ diff --git a/tests/steps/zookeeper.py b/tests/steps/zookeeper.py index 40a6989d..413f01d2 100644 --- a/tests/steps/zookeeper.py +++ b/tests/steps/zookeeper.py @@ -4,7 +4,7 @@ import os -from behave import given +from behave import given, then from kazoo.client import KazooClient from modules.docker import get_container, get_exposed_port from tenacity import retry, stop_after_attempt, wait_fixed @@ -64,6 +64,19 @@ def recursive_remove_node_data(zk_client, path, node): client.stop() +@then('we get zookeeper node with "{path}" path') +def step_get_zk_node(context, path): + client = _zk_client(context) + + try: + client.start() + result = client.get(path)[0].decode().strip() + finally: + client.stop() + + print(result) + + def _zk_client(context, instance_name="zookeeper01", port=2181, use_ssl=False): logging.set_module_log_level("kazoo", logging.CRITICAL)