diff --git a/README.md b/README.md index d8821f6..c136a58 100644 --- a/README.md +++ b/README.md @@ -235,6 +235,26 @@ service: exporters: [logging] ``` +## Monitor features + +Imalive is also able to check some http endpoint and log and export metrics (status and duration). + +In order to use that, just override the `/app/imalive.yml` with the following content: + +```yaml +--- +monitors: + - type: http + name: imalive + url: http://localhost:8081 + method: GET # optional (GET by default, only POST and GET are supported) + expected_http_code: 200 # optional (200 by default) + expected_contain: "\"status\":\"ok\"" # optional (no check on the body response if not present) + timeout: 30 # optional (30 seconds if not present) + username: changeit # optional (no basic auth if not present) + password: changerit # optional (no basic auth if not present) +``` + ## Development / contributions Go see this [documentation](./CONTRIBUTING.md) diff --git a/VERSION b/VERSION index 4f2c1d1..fcdb2e1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.6.6 +4.0.0 diff --git a/imalive.yml b/imalive.yml new file mode 100644 index 0000000..36f972d --- /dev/null +++ b/imalive.yml @@ -0,0 +1,2 @@ +--- +monitors: {} diff --git a/requirements.txt b/requirements.txt index 9ff2729..fade6ed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ opentelemetry-api opentelemetry-sdk opentelemetry-instrumentation-fastapi opentelemetry-exporter-otlp +pyyaml diff --git a/src/main.py b/src/main.py index e6aecd7..263224f 100644 --- a/src/main.py +++ b/src/main.py @@ -13,6 +13,7 @@ from utils.cid import get_current_cid from utils.manifests import get_manifest_as_dict from utils.heartbit import heartbit +from utils.monitor import monitors from utils.otel import init_otel_tracer, init_otel_metrics, init_otel_logger version = "unkown" @@ -41,6 +42,7 @@ init_otel_logger() heartbit() +monitors() instrumentator.instrument(app, metric_namespace='imalive', metric_subsystem='imalive') instrumentator.expose(app, endpoint='/v1/prom') diff --git a/src/utils/common.py b/src/utils/common.py index 14d114c..d79a66b 100644 --- a/src/utils/common.py +++ b/src/utils/common.py @@ -29,6 +29,13 @@ def is_empty_key(vdict, key): def is_not_empty_key(vdict, key): return not is_empty_key(vdict, key) +def remove_key_safely(vdict, key): + if is_not_empty_key(vdict, key): + del vdict[key] + +def get_or_else(vdict, key, default): + return default if is_empty_key(vdict, key) else vdict[key] + def is_numeric (var): if (isinstance(var, int)): return True diff --git a/src/utils/metrics.py b/src/utils/metrics.py index 0760ca9..fc61ecb 100644 --- a/src/utils/metrics.py +++ b/src/utils/metrics.py @@ -63,6 +63,7 @@ def all_metrics(): vdate = datetime.now() return { "status": "ok", + "type": "heartbit", 'name': os.environ['IMALIVE_NODE_NAME'], 'time': vdate.isoformat(), "disk_usage": disk_usage(), diff --git a/src/utils/monitor.py b/src/utils/monitor.py new file mode 100644 index 0000000..bb24570 --- /dev/null +++ b/src/utils/monitor.py @@ -0,0 +1,152 @@ +import os +import yaml +import requests +import asyncio +import threading + +import requests +import yaml + +from datetime import datetime +from time import sleep +from requests.auth import HTTPBasicAuth + +from utils.common import is_empty_key, get_or_else, is_not_empty, remove_key_safely +from utils.gauge import create_gauge, set_gauge +from utils.heartbit import WAIT_TIME +from utils.logger import log_msg +from utils.otel import get_otel_tracer + +def check_http_monitor(monitor, gauges): + vdate = datetime.now() + + if monitor['type'] != 'http': + log_msg("DEBUG", { + "status": "ok", + "type": "monitor", + "time": vdate.isoformat(), + "message": "Not an http monitor", + "monitor": monitor + }) + set_gauge(gauges['result'], 0) + return + + if is_empty_key(monitor, 'url'): + log_msg("ERROR", { + "status": "ko", + "type": "monitor", + "time": vdate.isoformat(), + "message": "Missing mandatory url", + "monitor": monitor + }) + set_gauge(gauges['result'], 0) + return + + method = get_or_else(monitor, 'method', 'GET') + timeout = get_or_else(monitor, 'timeout', 30) + expected_http_code = get_or_else(monitor, 'expected_http_code', 200) + expected_contain = get_or_else(monitor, 'expected_contain', None) + username = get_or_else(monitor, 'username', None) + password = get_or_else(monitor, 'password', None) + remove_key_safely(monitor, 'password') + + auth = None + duration = None + if is_not_empty(username) and is_not_empty(password): + auth = HTTPBasicAuth(username, password) + + try: + if method == "GET": + response = requests.get(monitor['url'], timeout=timeout, auth=auth) + duration = response.elapsed.total_seconds() + set_gauge(gauges['duration'], duration) + elif method == "POST": + response = requests.post(monitor['url'], timeout=timeout, auth=auth) + duration = response.elapsed.total_seconds() + set_gauge(gauges['duration'], duration) + else: + log_msg("ERROR", { + "status": "ko", + "type": "monitor", + "time": vdate.isoformat(), + "message": "Not supported http method", + "monitor": monitor + }) + set_gauge(gauges['result'], 0) + return + + if response.status_code != expected_http_code: + log_msg("ERROR", { + "status": "ko", + "type": "monitor", + "time": vdate.isoformat(), + "duration": duration, + "message": "Not supported http method", + "monitor": monitor + }) + set_gauge(gauges['result'], 0) + return + + if is_not_empty(expected_contain) and expected_contain not in response.text: + log_msg("ERROR", { + "status": "ko", + "type": "monitor", + "time": vdate.isoformat(), + "duration": duration, + "message": "Response not contain {}".format(expected_contain), + "monitor": monitor + }) + set_gauge(gauges['result'], 0) + return + + set_gauge(gauges['result'], 1) + log_msg("INFO", { + "status": "ok", + "type": "monitor", + "time": vdate.isoformat(), + "duration": duration, + "message": "Monitor is healthy", + "monitor": monitor + }) + + except Exception as e: + set_gauge(gauges['result'], 0) + log_msg("ERROR", { + "status": "ko", + "type": "monitor", + "time": vdate.isoformat(), + "message": "Unexpected error", + "error": "{}".format(e), + "monitor": monitor + }) + +gauges = {} +def monitors(): + def loop_monitors(): + config_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '..', '..', 'imalive.yml')) + with open(config_path, "r") as stream: + loaded_data = yaml.safe_load(stream) + for monitor in loaded_data['monitors']: + if is_empty_key(monitor, 'name'): + continue + + gauges[monitor['name']] = { + 'result': create_gauge("monitor_{}_result".format(monitor['name']), "monitor {} result".format(monitor['name'])), + 'duration': create_gauge("monitor_{}_duration".format(monitor['name']), "monitor {} duration".format(monitor['name'])) + } + + while True: + with get_otel_tracer().start_as_current_span("imalive-monitors"): + for monitor in loaded_data['monitors']: + if is_empty_key(monitor, 'name'): + continue + check_http_monitor(monitor, gauges[monitor['name']]) + sleep(WAIT_TIME) + + def start_monitors(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + loop.run_until_complete(loop_monitors()) + + async_thread = threading.Thread(target=start_monitors, daemon=True) + async_thread.start()