diff --git a/hotsos/core/host_helpers/__init__.py b/hotsos/core/host_helpers/__init__.py index bb6b8e894..d8c0c7493 100644 --- a/hotsos/core/host_helpers/__init__.py +++ b/hotsos/core/host_helpers/__init__.py @@ -13,6 +13,9 @@ DockerImageHelper, SnapPackageHelper, ) +from .pebble import ( # noqa: F403,F401 + PebbleHelper, +) from .ssl import ( # noqa: F403,F401 SSLCertificate, SSLCertificatesHelper, diff --git a/hotsos/core/host_helpers/cli.py b/hotsos/core/host_helpers/cli.py index cb672be7d..de036fb7b 100644 --- a/hotsos/core/host_helpers/cli.py +++ b/hotsos/core/host_helpers/cli.py @@ -111,8 +111,10 @@ def reset_command_inner(self, *args, **kwargs): class CmdBase(object): + TYPE = None def __init__(self): + self.original_cmd = None self.hooks = {} self.reset() @@ -142,6 +144,9 @@ def register_hook(self, name, f): """ self.hooks[name] = f + def __repr__(self): + return "type={}, cmd={}".format(self.TYPE, self.original_cmd) + class BinCmd(CmdBase): TYPE = "BIN" @@ -513,6 +518,7 @@ def __call__(self, *args, **kwargs): return NullSource()() # binary sources only apply if data_root is system root + bin_out = None for bsource in [s for s in self.sources if s.TYPE == "BIN"]: cache = False # NOTE: we currently only support caching commands with no @@ -524,18 +530,19 @@ def __call__(self, *args, **kwargs): return out try: - out = bsource(*args, **kwargs) + bin_out = bsource(*args, **kwargs) + if cache and bin_out is not None: + try: + self.cache.save(self.cmdkey, bin_out) + except pickle.PicklingError as exc: + log.info("unable to cache command '%s' output: %s", + self.cmdkey, exc) + + break except CLIExecError as exc: - return exc.return_value - - if cache and out is not None: - try: - self.cache.save(self.cmdkey, out) - except pickle.PicklingError as exc: - log.info("unable to cache command '%s' output: %s", - self.cmdkey, exc) + bin_out = exc.return_value - return out + return bin_out class CLICacheWrapper(object): @@ -837,6 +844,14 @@ def command_catalog(self): 'pacemaker_crm_status': [BinCmd('crm status'), FileCmd('sos_commands/pacemaker/crm_status')], + 'pebble_services': + [BinCmd('pebble services'), + # This is how operator charm run it + BinCmd('/charm/bin/pebble services'), + # this does not exist in sos yet but adding since it is useful + # for testing and will hopefully be supported in sos at some + # point. + FileCmd('sos_commands/pebble/pebble_services')], 'ps': [BinCmd('ps auxwww'), FileCmd('ps')], diff --git a/hotsos/core/host_helpers/pebble.py b/hotsos/core/host_helpers/pebble.py new file mode 100644 index 000000000..9c230f899 --- /dev/null +++ b/hotsos/core/host_helpers/pebble.py @@ -0,0 +1,147 @@ +import re + +from hotsos.core.log import log +from hotsos.core.factory import FactoryBase +from hotsos.core.host_helpers import CLIHelper +from hotsos.core.utils import cached_property, sorted_dict + +SVC_EXPR_TEMPLATES = { + "absolute": r".+\S+bin/({})(?:\s+.+|$)", + "snap": r".+\S+\d+/({})(?:\s+.+|$)", + "relative": r".+\s({})(?:\s+.+|$)", + } + + +class PebbleService(object): + + def __init__(self, name, state): + self.name = name + self.state = state + + def __repr__(self): + return "name={}, state={}".format(self.name, self.state) + + +class PebbleHelper(object): + """ Helper class used to query pebble services. """ + + def __init__(self, service_exprs, ps_allow_relative=True): + """ + @param service_exprs: list of python.re expressions used to match + service names. + @param ps_allow_relative: whether to allow commands to be identified + from ps as run using an relative binary + path e.g. mycmd as opposed to /bin/mycmd. + """ + self._ps_allow_relative = ps_allow_relative + self._service_exprs = set(service_exprs) + self._cached_unit_files_exprs = {} + + @cached_property + def services(self): + """ + Return a dict of identified pebble services and their state. + + Service units are either direct or indirect. We unify these types, + taking the state of whichever is actually in use i.e. has in-memory + instances. Enabled units are aggregated but masked units are not so + that they can be identified and reported. + """ + _services = {} + for line in CLIHelper().pebble_services(): + for svc_name_expr in self._service_exprs: + _expr = r"({})\s+\S+\s+(\S+)\s+.*".format(svc_name_expr) + ret = re.compile(_expr).match(line) + if not ret: + continue + + unit = ret.group(1) + state = ret.group(2) + _services[unit] = PebbleService(unit, state) + + return _services + + def get_process_cmd_from_line(self, line, expr): + for expr_type, expr_tmplt in SVC_EXPR_TEMPLATES.items(): + if expr_type == 'relative' and not self._ps_allow_relative: + continue + + ret = re.compile(expr_tmplt.format(expr)).match(line) + if ret: + svc = ret.group(1) + log.debug("matched process %s with %s expr", svc, + expr_type) + return svc + + @cached_property + def processes(self): + """ + Identify running processes from ps that are associated with resolved + pebble services. The same search pattern used for identifying pebble + services is to match the process binary name. + + Returns a dictionary of process names along with the number of each. + """ + _proc_info = {} + for line in CLIHelper().ps(): + for expr in self._service_exprs: + """ + look for running process with this name. + We need to account for different types of process binary e.g. + + /snap//1830/ + /usr/bin/ + + and filter e.g. + + /var/lib/ and /var/log/ + """ + cmd = self.get_process_cmd_from_line(line, expr) + if cmd: + if cmd in _proc_info: + _proc_info[cmd] += 1 + else: + _proc_info[cmd] = 1 + + return _proc_info + + @property + def _service_info(self): + """Return a dictionary of pebble services grouped by state. """ + info = {} + for svc, obj in sorted_dict(self.services).items(): + state = obj.state + if state not in info: + info[state] = [] + + info[state].append(svc) + + return info + + @property + def _process_info(self): + """Return a list of processes associated with services. """ + return ["{} ({})".format(name, count) + for name, count in sorted_dict(self.processes).items()] + + @property + def summary(self): + """ + Output a dict summary of this class i.e. services, their state and any + processes run by them. + """ + return {'pebble': self._service_info, + 'ps': self._process_info} + + +class ServiceFactory(FactoryBase): + """ + Factory to dynamically create PebbleService objects for given services. + + Service objects are returned when a getattr() is done on this object using + the name of the service as the attr name. + """ + + def __getattr__(self, svc): + log.debug("creating service object for %s", svc) + return PebbleHelper([svc]).services.get(svc) diff --git a/hotsos/core/plugins/openstack/common.py b/hotsos/core/plugins/openstack/common.py index 5e9f6af50..4cf6ab2b2 100644 --- a/hotsos/core/plugins/openstack/common.py +++ b/hotsos/core/plugins/openstack/common.py @@ -20,6 +20,7 @@ APTPackageHelper, DockerImageHelper, DPKGVersionCompare, + PebbleHelper, SystemdHelper, SSLCertificate, SSLCertificatesHelper, @@ -93,6 +94,8 @@ class OpenstackChecksBase(OpenstackBase, plugintools.PluginPartBase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.ost_projects = OSTProjectCatalog() + self.pebble = PebbleHelper( + service_exprs=self.ost_projects.service_exprs) self.systemd = SystemdHelper( service_exprs=self.ost_projects.service_exprs) self.apt = APTPackageHelper( diff --git a/hotsos/core/plugins/openstack/openstack.py b/hotsos/core/plugins/openstack/openstack.py index 0536053c3..7f999894a 100644 --- a/hotsos/core/plugins/openstack/openstack.py +++ b/hotsos/core/plugins/openstack/openstack.py @@ -2,6 +2,7 @@ import os from hotsos.core import host_helpers +from hotsos.core.log import log from hotsos.core.config import HotSOSConfig from hotsos.core.plugins.openstack.exceptions import ( EXCEPTIONS_COMMON, @@ -308,6 +309,11 @@ def services_expr(self): def services(self): exprs = self.services_expr info = host_helpers.SystemdHelper(service_exprs=exprs) + if not info.services: + log.debug("no systemd services found for '%s' - trying pebble", + self.name) + info = host_helpers.PebbleHelper(service_exprs=exprs) + return info.services def log_paths(self, include_deprecated_services=True): diff --git a/tests/unit/test_host_helpers.py b/tests/unit/test_host_helpers.py index 8f837505f..4c03e1697 100644 --- a/tests/unit/test_host_helpers.py +++ b/tests/unit/test_host_helpers.py @@ -16,6 +16,18 @@ c-key = 2-8,10-31 """ +PEBBLE_SERVICES = """Service Startup Current Since +nova-conductor enabled backoff today at 10:25 UTC +""" + +# pylint: disable=C0301 +PEBBLE_PS = """USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND +root 1 0.0 0.0 717708 10516 ? Ssl 08:43 0:01 /charm/bin/pebble run --create-dirs --hold --http :38814 --verbose +root 3048 0.0 0.0 2620 600 pts/0 Ss 10:14 0:00 sh -c bash +root 3055 0.0 0.0 7372 4036 pts/0 S 10:14 0:00 bash +root 3225 0.0 0.2 80748 65780 ? R 10:42 0:00 /usr/bin/python3 /usr/bin/nova-conductor +""" # noqa + class TestHostNetworkingHelper(utils.BaseTestCase): @@ -309,6 +321,26 @@ def test_systemd_helper(self): self.assertEqual(s.summary, expected) +class TestPebbleHelper(utils.BaseTestCase): + + @utils.create_data_root({'sos_commands/pebble/pebble_services': + PEBBLE_SERVICES}) + def test_service_factory(self): + svc = getattr(host_helpers.pebble.ServiceFactory(), 'nova-conductor') + self.assertEqual(svc.state, 'backoff') + + self.assertEqual(host_helpers.pebble.ServiceFactory().noexist, None) + + @utils.create_data_root({'sos_commands/pebble/pebble_services': + PEBBLE_SERVICES, + 'ps': PEBBLE_PS}) + def test_pebble_helper(self): + expected = {'ps': ['nova-conductor (1)'], + 'pebble': {'backoff': ['nova-conductor']}} + s = host_helpers.pebble.PebbleHelper([r'nova\S+']) + self.assertEqual(s.summary, expected) + + class TestFileStatHelper(utils.BaseTestCase): @utils.create_data_root({'foo': 'bar'})