Skip to content

Commit

Permalink
Store state data in zookeeper (#219)
Browse files Browse the repository at this point in the history
* Added store stats in zk

* monrun check rename new options

---------

Co-authored-by: Nikita Unisikhin <[email protected]>
  • Loading branch information
NikitaUnisikhin and Nikita Unisikhin authored Aug 11, 2024
1 parent 2124e28 commit 2455a54
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 24 deletions.
40 changes: 33 additions & 7 deletions ch_tools/chadmin/cli/object_storage_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
from humanfriendly import format_size

from ch_tools.chadmin.cli.chadmin_group import Chadmin
from ch_tools.chadmin.internal.zookeeper import (
check_zk_node,
create_zk_nodes,
update_zk_nodes,
)
from ch_tools.common.cli.formatting import print_response
from ch_tools.common.cli.parameters import TimeSpanParamType
from ch_tools.common.clickhouse.config import get_clickhouse_config
Expand All @@ -14,8 +19,8 @@
# Use big enough timeout for stream HTTP query
STREAM_TIMEOUT = 10 * 60

STORE_STATE_PATH = "/tmp/object_storage_cleanup_state.json"
ORPHANED_OBJECTS_SIZE_FIELD = "orphaned_objects_size"
STATE_LOCAL_PATH = "/tmp/object_storage_cleanup_state.json"


@group("object-storage", cls=Chadmin)
Expand Down Expand Up @@ -101,11 +106,17 @@ def object_storage_group(ctx: Context, disk_name: str) -> None:
help=("Use saved object list without traversing object storage again."),
)
@option(
"--store-state",
"store_state",
"--store-state-local",
"store_state_local",
is_flag=True,
help=("Write total size of orphaned objects to log file."),
)
@option(
"--store-state-zk-path",
"store_state_zk_path",
default="",
help=("Zookeeper node path for storage total size of orphaned objects."),
)
@pass_context
def clean_command(
ctx: Context,
Expand All @@ -117,7 +128,8 @@ def clean_command(
dry_run: bool,
keep_paths: bool,
use_saved_list: bool,
store_state: bool,
store_state_local: bool,
store_state_zk_path: str,
) -> None:
"""
Clean orphaned S3 objects.
Expand All @@ -134,13 +146,27 @@ def clean_command(
use_saved_list,
)

if store_state:
with open(STORE_STATE_PATH, "w", encoding="utf-8") as file:
json.dump({ORPHANED_OBJECTS_SIZE_FIELD: total_size}, file, indent=4)
if store_state_zk_path:
_store_state_zk_save(ctx, store_state_zk_path, total_size)

if store_state_local:
_store_state_local_save(ctx, total_size)

_print_response(ctx, dry_run, deleted, total_size)


def _store_state_zk_save(ctx: Context, path: str, total_size: int) -> None:
if not check_zk_node(ctx, path):
create_zk_nodes(ctx, [path], make_parents=True)
state_data = json.dumps({ORPHANED_OBJECTS_SIZE_FIELD: total_size}, indent=4)
update_zk_nodes(ctx, [path], state_data.encode("utf-8"))


def _store_state_local_save(_: Context, total_size: int) -> None:
with open(STATE_LOCAL_PATH, "w", encoding="utf-8") as file:
json.dump({ORPHANED_OBJECTS_SIZE_FIELD: total_size}, file, indent=4)


def _print_response(ctx: Context, dry_run: bool, deleted: int, total_size: int) -> None:
"""
Outputs result of cleaning.
Expand Down
63 changes: 56 additions & 7 deletions ch_tools/monrun_checks/ch_orphaned_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,24 @@

from ch_tools.chadmin.cli.object_storage_group import (
ORPHANED_OBJECTS_SIZE_FIELD,
STORE_STATE_PATH,
STATE_LOCAL_PATH,
)
from ch_tools.chadmin.internal.zookeeper import check_zk_node, get_zk_node
from ch_tools.common.result import CRIT, OK, WARNING, Result


@click.command("orphaned-objects")
@click.option(
"--state-local",
"state_local",
is_flag=True,
help="Get total size of orphaned objects from local file.",
)
@click.option(
"--state-zk-path",
"state_zk_path",
help="Zookeeper node path from which the total size of orphaned objects will be taken.",
)
@click.option(
"-c",
"--critical",
Expand All @@ -28,18 +40,55 @@
)
@click.pass_context
def orphaned_objects_command(
_: click.Context,
ctx: click.Context,
state_local: bool,
state_zk_path: str,
crit: int,
warn: int,
) -> Result:
try:
with open(STORE_STATE_PATH, mode="r", encoding="utf-8") as file:
total_size = json.load(file).get(ORPHANED_OBJECTS_SIZE_FIELD)
except FileNotFoundError:
total_size = 0
_check_mutually_exclusive(state_local, state_zk_path)

total_size = 0
if state_zk_path:
total_size = _zk_get_total_size(ctx, state_zk_path)

if state_local:
total_size = _local_get_total_size()

msg = f"Total size: {total_size}"
if total_size >= crit:
return Result(CRIT, msg)
if total_size >= warn:
return Result(WARNING, msg)
return Result(OK, msg)


def _check_mutually_exclusive(state_local, state_zk_path):
if not state_local and not state_zk_path:
raise click.UsageError(
"One of these options must be provided: --state-local, --state-zk-path"
)

if state_local and state_zk_path:
raise click.UsageError(
"Options --state-local and --state-zk-path are mutually exclusive."
)


def _local_get_total_size() -> int:
try:
with open(STATE_LOCAL_PATH, mode="r", encoding="utf-8") as file:
total_size = json.load(file).get(ORPHANED_OBJECTS_SIZE_FIELD)
except FileNotFoundError:
total_size = 0

return total_size


def _zk_get_total_size(ctx: click.Context, state_zk_path: str) -> int:
total_size = 0
if check_zk_node(ctx, state_zk_path):
total_size = json.loads(get_zk_node(ctx, state_zk_path)).get(
ORPHANED_OBJECTS_SIZE_FIELD
)
return total_size
82 changes: 76 additions & 6 deletions tests/features/monrun.feature
Original file line number Diff line number Diff line change
Expand Up @@ -389,10 +389,14 @@ Feature: ch-monitoring tool
2;KazooTimeoutError('Connection time-out')
"""

Scenario: Check CH store state
Scenario: Check clickhouse orphaned objects with state-zk-path option
When we execute command on clickhouse01
"""
ch-monitoring orphaned-objects
chadmin object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-zk-path /tmp/shard_1
"""
When we execute command on clickhouse01
"""
ch-monitoring orphaned-objects --state-zk-path /tmp/shard_1
"""
Then we get response
"""
Expand All @@ -406,29 +410,95 @@ Feature: ch-monitoring tool
"""
When we execute command on clickhouse01
"""
chadmin object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state
chadmin object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-zk-path /tmp/shard_1
"""
When we execute command on clickhouse01
"""
ch-monitoring orphaned-objects
ch-monitoring orphaned-objects --state-zk-path /tmp/shard_1
"""
Then we get response contains
"""
0;Total size: 10
"""
When we execute command on clickhouse01
"""
ch-monitoring orphaned-objects -w 9 -c 19
ch-monitoring orphaned-objects -w 9 -c 19 --state-zk-path /tmp/shard_1
"""
Then we get response contains
"""
1;Total size: 10
"""
When we execute command on clickhouse01
"""
ch-monitoring orphaned-objects -w 4 -c 9
ch-monitoring orphaned-objects -w 4 -c 9 --state-zk-path /tmp/shard_1
"""
Then we get response contains
"""
2;Total size: 10
"""

Scenario: Check clickhouse orphaned objects with state-local option
When we execute command on clickhouse01
"""
chadmin object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-local
"""
When we execute command on clickhouse01
"""
ch-monitoring orphaned-objects --state-local
"""
Then we get response
"""
0;Total size: 0
"""
When we put object in S3
"""
bucket: cloud-storage-test
path: /data/orpaned_object.tsv
data: '1234567890'
"""
When we execute command on clickhouse01
"""
chadmin object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-local
"""
When we execute command on clickhouse01
"""
ch-monitoring orphaned-objects --state-local
"""
Then we get response contains
"""
0;Total size: 10
"""
When we execute command on clickhouse01
"""
ch-monitoring orphaned-objects -w 9 -c 19 --state-local
"""
Then we get response contains
"""
1;Total size: 10
"""
When we execute command on clickhouse01
"""
ch-monitoring orphaned-objects -w 4 -c 9 --state-local
"""
Then we get response contains
"""
2;Total size: 10
"""

Scenario: Check clickhouse orphaned objects --state-local and --state-zk-path are mutually exclusive
When we execute command on clickhouse01
"""
ch-monitoring orphaned-objects -w 9 -c 19 --state-local --state-zk-path /tmp/shard_1
"""
Then we get response contains
"""
1;Unknown error: Options --state-local and --state-zk-path are mutually exclusive.
"""
When we execute command on clickhouse01
"""
ch-monitoring orphaned-objects -w 9 -c 19
"""
Then we get response contains
"""
1;Unknown error: One of these options must be provided: --state-local, --state-zk-path
"""
34 changes: 31 additions & 3 deletions tests/features/object_storage.feature
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,38 @@ Feature: chadmin object-storage commands
TotalSize: 3
"""

Scenario: Clean with flag store-state works
Scenario: Clean with store-state-zk-path works
When we execute command on clickhouse01
"""
chadmin --format yaml object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state
chadmin --format yaml object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-zk-path /tmp/shard_1
"""
Then we get zookeeper node with "/tmp/shard_1" path
"""
{
"orphaned_objects_size": 0
}
"""
When we put object in S3
"""
bucket: cloud-storage-test
path: /data/orpaned_object.tsv
data: '1234567890'
"""
When we execute command on clickhouse01
"""
chadmin --format yaml object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-zk-path /tmp/shard_1
"""
Then we get zookeeper node with "/tmp/shard_1" path
"""
{
"orphaned_objects_size": 10
}
"""

Scenario: Clean with store-state-local works
When we execute command on clickhouse01
"""
chadmin --format yaml object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-local
"""
Then we get file /tmp/object_storage_cleanup_state.json
"""
Expand All @@ -189,7 +217,7 @@ Feature: chadmin object-storage commands
"""
When we execute command on clickhouse01
"""
chadmin --format yaml object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state
chadmin --format yaml object-storage clean --dry-run --to-time 0h --on-cluster --keep-paths --store-state-local
"""
Then we get file /tmp/object_storage_cleanup_state.json
"""
Expand Down
15 changes: 14 additions & 1 deletion tests/steps/zookeeper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import os

from behave import given
from behave import given, then
from kazoo.client import KazooClient
from modules.docker import get_container, get_exposed_port
from tenacity import retry, stop_after_attempt, wait_fixed
Expand Down Expand Up @@ -64,6 +64,19 @@ def recursive_remove_node_data(zk_client, path, node):
client.stop()


@then('we get zookeeper node with "{path}" path')
def step_get_zk_node(context, path):
client = _zk_client(context)

try:
client.start()
result = client.get(path)[0].decode().strip()
finally:
client.stop()

print(result)


def _zk_client(context, instance_name="zookeeper01", port=2181, use_ssl=False):
logging.set_module_log_level("kazoo", logging.CRITICAL)

Expand Down

0 comments on commit 2455a54

Please sign in to comment.