Skip to content

Commit

Permalink
[BZ-2249640] verify nodeAffinity for cephtoolbox pod (#9973)
Browse files Browse the repository at this point in the history
Signed-off-by: nagendra202 <[email protected]>
  • Loading branch information
nagendra202 authored Aug 29, 2024
1 parent 60f0479 commit af214a6
Show file tree
Hide file tree
Showing 2 changed files with 200 additions and 0 deletions.
78 changes: 78 additions & 0 deletions ocs_ci/ocs/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -3006,3 +3006,81 @@ def get_node_by_internal_ip(internal_ip):
return n

return None


def get_worker_node_where_ceph_toolbox_not_running():
"""
This function get a list of all worker nodes
and compare each worker node name with the node name of ceph tool box running.
Returns:
List of worker nodes other than the node where ceph tool box is already running.
"""
ct_pod_running_node_name = get_ceph_tools_running_node()
worker_nodes = get_worker_nodes()
log.info(
f"List of all worker nodes available in the cluster currently {worker_nodes}"
)
other_nodes = [node for node in worker_nodes if node != ct_pod_running_node_name]
log.info(f"List of worker nodes where ceph tools pod is not running: {other_nodes}")
return other_nodes


def apply_node_affinity_for_ceph_toolbox(node_name):
"""
Apply node affinity for ceph toolbox pod.
Args:
node_name = node name which need to be added in the node affinity
Returns:
bool: True if node affinity applied successfully
"""
resource_name = constants.DEFAULT_CLUSTERNAME
if config.DEPLOYMENT["external_mode"]:
resource_name = constants.DEFAULT_CLUSTERNAME_EXTERNAL_MODE

storagecluster_obj = ocp.OCP(
resource_name=resource_name,
namespace=config.ENV_DATA["cluster_namespace"],
kind=constants.STORAGECLUSTER,
)
nodeaffinity = (
f'{{"toolbox": {{"nodeAffinity": {{"requiredDuringSchedulingIgnoredDuringExecution": '
f'{{"nodeSelectorTerms": [{{"matchExpressions": [{{"key": "kubernetes.io/hostname",'
f'"operator": "In",'
f'"values": ["{node_name}"]}}]}}]}}}}}}}}'
)
param = f'{{"spec": {{"placement": {nodeaffinity}}}}}'
ct_pod = pod.get_ceph_tools_pod(skip_creating_pod=True)
ct_pod_name = ct_pod.name
storagecluster_obj.patch(params=param, format_type="merge")
log.info(
f"Successfully applied node affinity for ceph toolbox pod with {node_name}"
)
ct_pod.ocp.wait_for_delete(ct_pod_name)
log.info(
"Identify on which node the ceph toolbox is running after failover due to node affinity"
)
ct_new_pod_running_node_name = get_ceph_tools_running_node()
if node_name == ct_new_pod_running_node_name:
log.info(
f"ceph toolbox pod failovered to the new node {ct_new_pod_running_node_name}"
f" given in node affinity successfully "
)
return True


def get_ceph_tools_running_node():
"""
Get node name where the ceph tools pod is currently running
Returns:
str: name of the node where ceph tools is running
"""
ct_pod = pod.get_ceph_tools_pod(wait=True, skip_creating_pod=True)
ct_pod_running_node = ct_pod.data["spec"].get("nodeName")
return ct_pod_running_node
122 changes: 122 additions & 0 deletions tests/functional/pod_and_daemons/test_cephtoolbox_pod_nodeaffinity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import logging
import pytest
import random

from ocs_ci.framework import config
from ocs_ci.framework.pytest_customization.marks import bugzilla, brown_squad
from ocs_ci.framework.testlib import tier1, tier4b, polarion_id
from ocs_ci.ocs import ocp, constants
from ocs_ci.ocs.node import (
apply_node_affinity_for_ceph_toolbox,
check_taint_on_nodes,
drain_nodes,
get_ceph_tools_running_node,
get_worker_nodes,
get_worker_node_where_ceph_toolbox_not_running,
schedule_nodes,
taint_nodes,
unschedule_nodes,
untaint_nodes,
)
from ocs_ci.ocs.resources.pod import wait_for_pods_to_be_running

log = logging.getLogger(__name__)


@brown_squad
@bugzilla("2249640")
class TestCephtoolboxPod:
@pytest.fixture(scope="session", autouse=True)
def teardown(self, request):
def finalizer():
"""
Finalizer will take care of below activities:
1. Untaint the nodes: remove taints from nodes
2. Removes nodeaffinity to bring storage cluster with default values.
"""
if check_taint_on_nodes():
untaint_nodes()
resource_name = constants.DEFAULT_CLUSTERNAME
if config.DEPLOYMENT["external_mode"]:
resource_name = constants.DEFAULT_CLUSTERNAME_EXTERNAL_MODE
storagecluster_obj = ocp.OCP(
resource_name=resource_name,
namespace=config.ENV_DATA["cluster_namespace"],
kind=constants.STORAGECLUSTER,
)
params = '[{"op": "remove", "path": "/spec/placement/toolbox"},]'
storagecluster_obj.patch(params=params, format_type="json")
log.info("Patched storage cluster back to the default")
assert (
wait_for_pods_to_be_running()
), "some of the pods didn't came up running"

request.addfinalizer(finalizer)

@tier1
@polarion_id("OCS-6086")
def test_node_affinity_to_ceph_toolbox_pod(self):
"""
This test verifies whether ceph toolbox failovered or not after applying node affinity
"""
other_nodes = get_worker_node_where_ceph_toolbox_not_running()
other_node_name = random.choice(other_nodes)
log.info(
"Apply node affinity with a node name other than currently running node."
)
assert apply_node_affinity_for_ceph_toolbox(
other_node_name
), "Failed to apply node affinity for the Ceph toolbox on the specified node."

@tier4b
@polarion_id("OCS-6087")
def test_reboot_node_affinity_node(self):
"""
This test verifies ceph toolbox runs only on the node given in node-affinity.
Reboot the node after applying node-affinity.
Expectation is the pod should come up only on that node mentioned in affinity.
"""
other_nodes = get_worker_node_where_ceph_toolbox_not_running()
node_name = random.choice(other_nodes)
apply_node_affinity_for_ceph_toolbox(node_name)
log.info("Unschedule ceph tools pod running node.")
unschedule_nodes([node_name])
log.info(f"node {node_name} unscheduled successfully")
drain_nodes([node_name])
log.info(f"node {node_name} drained successfully")
schedule_nodes([node_name])
log.info(f"Scheduled the node {node_name}")
log.info("Identify on which node the ceph toolbox is running after node drain")
ct_pod_running_node_name = get_ceph_tools_running_node()
if node_name == ct_pod_running_node_name:
log.info(
f"ceph toolbox pod is running on a node {node_name} which is in node-affinity"
)
else:
log.error(
f"Ceph toolbox pod is not running on the nodeAffinity given node {node_name}."
)
assert False, "Ceph toolbox pod is not on the expected node."

@tier4b
@polarion_id("OCS-6090")
def test_nodeaffinity_to_ceph_toolbox_with_default_taints(self):
"""
This test verifies whether ceph toolbox failovered or not after applying node affinity on tainted node
"""
worker_nodes = get_worker_nodes()
log.info(f"Current available worker nodes are {worker_nodes}")
taint_nodes(worker_nodes)
log.info("Applied default taints on all the worker nodes")
other_nodes = get_worker_node_where_ceph_toolbox_not_running()
other_node_name = random.choice(other_nodes)
log.info(
"Apply node affinity with a node name other than currently running node."
)
assert apply_node_affinity_for_ceph_toolbox(
other_node_name
), "Failed to apply nodeaffinity with default taints"

0 comments on commit af214a6

Please sign in to comment.