From 8084ad2a329f9f2535966bd34796c92b6f4d3ed4 Mon Sep 17 00:00:00 2001 From: Fotis Voutsas Date: Wed, 24 May 2023 17:28:50 +0300 Subject: [PATCH 01/23] Create category overview pages for learn's restructure (#15091) Co-authored-by: Ilya Mashchenko --- .../accessing-netdata-dashboards.md | 3 +++ .../build-the-netdata-agent-yourself.md | 3 +++ .../install-netdata-on-embedded-systems.md | 3 +++ .../install-with-a-cicd-provisioning-system.md | 3 +++ ...ne-learning-and-assisted-troubleshooting.md | 3 +++ ...maintenance-operations-on-netdata-agents.md | 3 +++ .../metrics-streaming-and-replication.md | 3 +++ docs/category-overview-pages/misc-overview.md | 18 +----------------- .../monitor-your-infrastructure.md | 3 +++ docs/category-overview-pages/netdata-apis.md | 5 +++++ .../netdata-architecture.md | 3 +++ .../netdata-dashboards-and-visualizations.md | 3 +++ .../optimizing-metrics-database.md | 3 +++ 13 files changed, 39 insertions(+), 17 deletions(-) create mode 100644 docs/category-overview-pages/accessing-netdata-dashboards.md create mode 100644 docs/category-overview-pages/build-the-netdata-agent-yourself.md create mode 100644 docs/category-overview-pages/install-netdata-on-embedded-systems.md create mode 100644 docs/category-overview-pages/install-with-a-cicd-provisioning-system.md create mode 100644 docs/category-overview-pages/machine-learning-and-assisted-troubleshooting.md create mode 100644 docs/category-overview-pages/maintenance-operations-on-netdata-agents.md create mode 100644 docs/category-overview-pages/metrics-streaming-and-replication.md create mode 100644 docs/category-overview-pages/monitor-your-infrastructure.md create mode 100644 docs/category-overview-pages/netdata-apis.md create mode 100644 docs/category-overview-pages/netdata-architecture.md create mode 100644 docs/category-overview-pages/netdata-dashboards-and-visualizations.md create mode 100644 docs/category-overview-pages/optimizing-metrics-database.md diff --git a/docs/category-overview-pages/accessing-netdata-dashboards.md b/docs/category-overview-pages/accessing-netdata-dashboards.md new file mode 100644 index 00000000000000..46c0bcff172277 --- /dev/null +++ b/docs/category-overview-pages/accessing-netdata-dashboards.md @@ -0,0 +1,3 @@ +# Accessing Netdata Dashboards + +This section contains documentation on how you can access the Netdata Agent's dashboards, and the Netdata Cloud's dashboards. \ No newline at end of file diff --git a/docs/category-overview-pages/build-the-netdata-agent-yourself.md b/docs/category-overview-pages/build-the-netdata-agent-yourself.md new file mode 100644 index 00000000000000..99166ad95137bf --- /dev/null +++ b/docs/category-overview-pages/build-the-netdata-agent-yourself.md @@ -0,0 +1,3 @@ +# Build the Netdata Agent yourself + +This section contains documentation on all the ways that you can build the Netdata Agent. \ No newline at end of file diff --git a/docs/category-overview-pages/install-netdata-on-embedded-systems.md b/docs/category-overview-pages/install-netdata-on-embedded-systems.md new file mode 100644 index 00000000000000..dfaa4482c2aae9 --- /dev/null +++ b/docs/category-overview-pages/install-netdata-on-embedded-systems.md @@ -0,0 +1,3 @@ +# Install Netdata on Embedded Systems Overview + +This section contains documentation for installation methods when it comes to Embedded Systems. \ No newline at end of file diff --git a/docs/category-overview-pages/install-with-a-cicd-provisioning-system.md b/docs/category-overview-pages/install-with-a-cicd-provisioning-system.md new file mode 100644 index 00000000000000..30a5a706cd86af --- /dev/null +++ b/docs/category-overview-pages/install-with-a-cicd-provisioning-system.md @@ -0,0 +1,3 @@ +# Install with a CI/CD Provisioning System Overview + +This section contains documentation on all the installation methods through a CI/CD system. \ No newline at end of file diff --git a/docs/category-overview-pages/machine-learning-and-assisted-troubleshooting.md b/docs/category-overview-pages/machine-learning-and-assisted-troubleshooting.md new file mode 100644 index 00000000000000..074051e3ecdd6b --- /dev/null +++ b/docs/category-overview-pages/machine-learning-and-assisted-troubleshooting.md @@ -0,0 +1,3 @@ +# Machine Learning and Assisted Troubleshooting Overview + +This section contains documentation regarding Netdata's troubleshooting and machine learning features. \ No newline at end of file diff --git a/docs/category-overview-pages/maintenance-operations-on-netdata-agents.md b/docs/category-overview-pages/maintenance-operations-on-netdata-agents.md new file mode 100644 index 00000000000000..207a0bd327fef1 --- /dev/null +++ b/docs/category-overview-pages/maintenance-operations-on-netdata-agents.md @@ -0,0 +1,3 @@ +# Maintenance operations on Netdata Agents Overview + +This section provides information on various actions you can take when maintaining a Netdata Agent. \ No newline at end of file diff --git a/docs/category-overview-pages/metrics-streaming-and-replication.md b/docs/category-overview-pages/metrics-streaming-and-replication.md new file mode 100644 index 00000000000000..37b040e9ed6ddb --- /dev/null +++ b/docs/category-overview-pages/metrics-streaming-and-replication.md @@ -0,0 +1,3 @@ +# Metrics Streaming and Replication Overview + +This section contains documentation to help you understand and configure streaming and replication with Netdata. \ No newline at end of file diff --git a/docs/category-overview-pages/misc-overview.md b/docs/category-overview-pages/misc-overview.md index e0c1cc0d1902b1..dbb11e9bc8e56c 100644 --- a/docs/category-overview-pages/misc-overview.md +++ b/docs/category-overview-pages/misc-overview.md @@ -1,19 +1,3 @@ - - # Miscellaneous material -This section contains temporary material that no longer belongs in our official documentation, and will -be moved to other locations. We keep it here to make it accessible while we create the new articles. - - - - - +This section contains material that will be moved to new locations as we see fit. We keep it here to make it accessible while we make these changes. \ No newline at end of file diff --git a/docs/category-overview-pages/monitor-your-infrastructure.md b/docs/category-overview-pages/monitor-your-infrastructure.md new file mode 100644 index 00000000000000..3582e88a68fc06 --- /dev/null +++ b/docs/category-overview-pages/monitor-your-infrastructure.md @@ -0,0 +1,3 @@ +# Monitor your Infrastructure Overview + +This section contains documentation on how you can use Netdata Cloud and it's features to monitor your entire infrastructure. \ No newline at end of file diff --git a/docs/category-overview-pages/netdata-apis.md b/docs/category-overview-pages/netdata-apis.md new file mode 100644 index 00000000000000..82d1c1752eea75 --- /dev/null +++ b/docs/category-overview-pages/netdata-apis.md @@ -0,0 +1,5 @@ +# Netdata APIs Overview + +This section contains information about Netdata's APIs. + +You can access the Netdata Agent's API through swagger UI [here](/api). \ No newline at end of file diff --git a/docs/category-overview-pages/netdata-architecture.md b/docs/category-overview-pages/netdata-architecture.md new file mode 100644 index 00000000000000..70f12659773df5 --- /dev/null +++ b/docs/category-overview-pages/netdata-architecture.md @@ -0,0 +1,3 @@ +# Netdata Architecture Overview + +This section's purpose is to explain the architecture of Netdata, the role of the Agent and the Cloud, and more. \ No newline at end of file diff --git a/docs/category-overview-pages/netdata-dashboards-and-visualizations.md b/docs/category-overview-pages/netdata-dashboards-and-visualizations.md new file mode 100644 index 00000000000000..cc9304365f1a45 --- /dev/null +++ b/docs/category-overview-pages/netdata-dashboards-and-visualizations.md @@ -0,0 +1,3 @@ +# Netdata Dashboards and Visualizations Overview + +This section provides documentation about all the visualization operations, features and insights that Netdata provides. \ No newline at end of file diff --git a/docs/category-overview-pages/optimizing-metrics-database.md b/docs/category-overview-pages/optimizing-metrics-database.md new file mode 100644 index 00000000000000..fdbd3b690cc613 --- /dev/null +++ b/docs/category-overview-pages/optimizing-metrics-database.md @@ -0,0 +1,3 @@ +# Optimizing Metrics Database Overview + +This section contains documentation to help you understand how the metrics DB works, understand the key features and configure them to suit your needs. \ No newline at end of file From 51e1185dc5adf4d40bc8557abca9156ced741fb5 Mon Sep 17 00:00:00 2001 From: thiagoftsm Date: Wed, 24 May 2023 14:45:12 +0000 Subject: [PATCH 02/23] Address issue with Thanos Receiver (#15094) * fix_label_order: fix label order to export data for Thanos * fix_label_order: Split chart and family inside generate_as_collected_prom_metric * fix_label_order: Unify calls to buffer_sprintf and fix typo --- exporting/prometheus/prometheus.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/exporting/prometheus/prometheus.c b/exporting/prometheus/prometheus.c index 24bd215f4068d6..7a1112abdd5bc3 100644 --- a/exporting/prometheus/prometheus.c +++ b/exporting/prometheus/prometheus.c @@ -470,12 +470,12 @@ static void generate_as_collected_prom_metric(BUFFER *wb, struct gen_parameters if (!homogeneous) buffer_sprintf(wb, "_%s", p->dimension); - buffer_sprintf(wb, "%s{chart=\"%s\",family=\"%s\"", p->suffix, p->chart, p->family); + buffer_sprintf(wb, "%s{chart=\"%s\"", p->suffix, p->chart); if (homogeneous) buffer_sprintf(wb, ",dimension=\"%s\"", p->dimension); - buffer_sprintf(wb, "%s} ", p->labels); + buffer_sprintf(wb, ",family=\"%s\"%s} ", p->family, p->labels); if (prometheus_collector) buffer_sprintf( @@ -713,30 +713,30 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus( if (output_options & PROMETHEUS_OUTPUT_TIMESTAMPS) buffer_sprintf( wb, - "%s_%s%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " NETDATA_DOUBLE_FORMAT + "%s_%s%s%s{chart=\"%s\",dimension=\"%s\",family=\"%s\"%s} " NETDATA_DOUBLE_FORMAT " %llu\n", prefix, context, units, suffix, chart, - family, dimension, + family, labels, value, last_time * MSEC_PER_SEC); else buffer_sprintf( wb, - "%s_%s%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " NETDATA_DOUBLE_FORMAT + "%s_%s%s%s{chart=\"%s\",dimension=\"%s\",family=\"%s\"%s} " NETDATA_DOUBLE_FORMAT "\n", prefix, context, units, suffix, chart, - family, dimension, + family, labels, value); } From 7e1e90cce4ab1dd8a49af086c32f108e435e6bfa Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Wed, 24 May 2023 17:57:36 +0300 Subject: [PATCH 03/23] fix cockroachdb alarms (#15095) --- health/health.d/cockroachdb.conf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/health/health.d/cockroachdb.conf b/health/health.d/cockroachdb.conf index 1f227841e6556b..09e4f9d40857b6 100644 --- a/health/health.d/cockroachdb.conf +++ b/health/health.d/cockroachdb.conf @@ -6,7 +6,7 @@ class: Utilization type: Database component: CockroachDB - calc: $capacity_used_percent + calc: $total units: % every: 10s warn: $this > (($status >= $WARNING) ? (80) : (85)) @@ -20,7 +20,7 @@ component: CockroachDB class: Utilization type: Database component: CockroachDB - calc: $capacity_usable_used_percent + calc: $usable units: % every: 10s warn: $this > (($status >= $WARNING) ? (80) : (85)) @@ -36,7 +36,7 @@ component: CockroachDB class: Errors type: Database component: CockroachDB - calc: $ranges_unavailable + calc: $unavailable units: num every: 10s warn: $this > 0 @@ -49,7 +49,7 @@ component: CockroachDB class: Errors type: Database component: CockroachDB - calc: $ranges_underreplicated + calc: $under_replicated units: num every: 10s warn: $this > 0 @@ -64,7 +64,7 @@ component: CockroachDB class: Utilization type: Database component: CockroachDB - calc: $sys_fd_open/$sys_fd_softlimit * 100 + calc: $open/$sys_fd_softlimit * 100 units: % every: 10s warn: $this > 80 From db535e9fd82d09d5101804ecb6a3e2a9208aacf0 Mon Sep 17 00:00:00 2001 From: thiagoftsm Date: Wed, 24 May 2023 16:38:01 +0000 Subject: [PATCH 04/23] New eBPF option (#14691) --- collectors/ebpf.plugin/README.md | 44 +++-- collectors/ebpf.plugin/ebpf.c | 109 +++++++---- collectors/ebpf.plugin/ebpf.d.conf | 5 + collectors/ebpf.plugin/ebpf.d/cachestat.conf | 3 + collectors/ebpf.plugin/ebpf.d/dcstat.conf | 3 + collectors/ebpf.plugin/ebpf.d/fd.conf | 3 + collectors/ebpf.plugin/ebpf.d/network.conf | 4 + collectors/ebpf.plugin/ebpf.d/process.conf | 7 +- collectors/ebpf.plugin/ebpf.d/shm.conf | 3 + collectors/ebpf.plugin/ebpf.d/swap.conf | 3 + collectors/ebpf.plugin/ebpf.d/sync.conf | 4 + collectors/ebpf.plugin/ebpf.d/vfs.conf | 13 ++ collectors/ebpf.plugin/ebpf.h | 1 + collectors/ebpf.plugin/ebpf_apps.c | 37 +++- collectors/ebpf.plugin/ebpf_apps.h | 4 +- collectors/ebpf.plugin/ebpf_cachestat.c | 92 +++++++--- collectors/ebpf.plugin/ebpf_dcstat.c | 93 +++++++--- collectors/ebpf.plugin/ebpf_disk.c | 34 +++- collectors/ebpf.plugin/ebpf_disk.h | 3 +- collectors/ebpf.plugin/ebpf_fd.c | 81 ++++++--- collectors/ebpf.plugin/ebpf_filesystem.c | 182 +++++++++++++++---- collectors/ebpf.plugin/ebpf_filesystem.h | 10 + collectors/ebpf.plugin/ebpf_hardirq.c | 18 +- collectors/ebpf.plugin/ebpf_mdflush.c | 23 ++- collectors/ebpf.plugin/ebpf_mount.c | 32 +++- collectors/ebpf.plugin/ebpf_oomkill.c | 15 +- collectors/ebpf.plugin/ebpf_process.c | 92 ++++++++-- collectors/ebpf.plugin/ebpf_process.h | 7 + collectors/ebpf.plugin/ebpf_shm.c | 74 ++++++-- collectors/ebpf.plugin/ebpf_socket.c | 132 +++++++++++--- collectors/ebpf.plugin/ebpf_softirq.c | 32 +++- collectors/ebpf.plugin/ebpf_swap.c | 79 ++++++-- collectors/ebpf.plugin/ebpf_sync.c | 172 ++++++++++++++---- collectors/ebpf.plugin/ebpf_vfs.c | 89 ++++++--- libnetdata/ebpf/ebpf.c | 117 ++++++++++-- libnetdata/ebpf/ebpf.h | 14 ++ 36 files changed, 1296 insertions(+), 338 deletions(-) diff --git a/collectors/ebpf.plugin/README.md b/collectors/ebpf.plugin/README.md index 75f44a6e5c4674..94bbc184db4025 100644 --- a/collectors/ebpf.plugin/README.md +++ b/collectors/ebpf.plugin/README.md @@ -99,8 +99,6 @@ accepts the following values: - `return`: In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates new charts for the return of these functions, such as errors. Monitoring function returns can help in debugging software, such as failing to close file descriptors or creating zombie processes. -- `update every`: Number of seconds used for eBPF to send data for Netdata. -- `pid table size`: Defines the maximum number of PIDs stored inside the application hash table. #### Integration with `apps.plugin` @@ -115,11 +113,6 @@ If you want to enable `apps.plugin` integration, change the "apps" setting to "y apps = yes ``` -When the integration is enabled, eBPF collector allocates memory for each process running. The total allocated memory -has direct relationship with the kernel version. When the eBPF plugin is running on kernels newer than `4.15`, it uses -per-cpu maps to speed up the update of hash tables. This also implies storing data for the same PID for each processor -it runs. - #### Integration with `cgroups.plugin` The eBPF collector also creates charts for each cgroup through an integration with the @@ -138,6 +131,13 @@ If you do not need to monitor specific metrics for your `cgroups`, you can enabl `ebpf.d.conf`, and then disable the plugin for a specific `thread` by following the steps in the [Configuration](#configuring-ebpfplugin) section. +#### Maps per Core + +When netdata is running on kernels newer than `4.6` users are allowed to modify how the `ebpf.plugin` creates maps (hash or +array). When `maps per core` is defined as `yes`, plugin will create a map per core on host, on the other hand, +when the value is set as `no` only one hash table will be created, this option will use less memory, but it also can +increase overhead for processes. + #### Collect PID When one of the previous integrations is enabled, `ebpf.plugin` will use Process Identifier (`PID`) to identify the @@ -157,6 +157,16 @@ The threads that have integration with other collectors have an internal clean u will only enable these threads integrated with other collectors when the kernel is compiled with `CONFIG_DEBUG_INFO_BTF`, unless you enable them manually. +#### Collection period + +The plugin uses the option `update every` to define the number of seconds used for eBPF to send data for Netdata. The default value +is 5 seconds. + +#### PID table size + +The option `pid table size` defines the maximum number of PIDs stored inside the application hash table. The default value +is defined according [kernel](https://elixir.bootlin.com/linux/v6.0.19/source/include/linux/threads.h#L28) source code. + #### Integration Dashboard Elements When an integration is enabled, your dashboard will also show the following cgroups and apps charts using low-level @@ -880,14 +890,24 @@ These are tracepoints related to [OOM](https://en.wikipedia.org/wiki/Out_of_memo eBPF monitoring is complex and produces a large volume of metrics. We've discovered scenarios where the eBPF plugin significantly increases kernel memory usage by several hundred MB. -If your node is experiencing high memory usage and there is no obvious culprit to be found in the `apps.mem` chart, -consider testing for high kernel memory usage by [disabling eBPF monitoring](#configuring-ebpfplugin). Next, -[restart Netdata](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) with `sudo systemctl restart netdata` to see if system memory -usage (see the `system.ram` chart) has dropped significantly. +When the integration with apps or cgroup is enabled, the eBPF collector allocates memory for each process running. If your +node is experiencing high memory usage and there is no obvious culprit to be found in the `apps.mem` chart, consider: + +- Modify [maps per core](#maps-per-core) to use only one map. +- Disable [integration with apps](#integration-with-appsplugin). +- Disable [integration with cgroup](#integration-with-cgroupsplugin). -Beginning with `v1.31`, kernel memory usage is configurable via the [`pid table size` setting](#ebpf-load-mode) +If with these changes you still suspect eBPF using too much memory, and there is no obvious culprit to be found +in the `apps.mem` chart, consider testing for high kernel memory usage by [disabling eBPF monitoring](#configuring-ebpfplugin). +Next, [restart Netdata](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) with +`sudo systemctl restart netdata` to see if system memory usage (see the `system.ram` chart) has dropped significantly. + +Beginning with `v1.31`, kernel memory usage is configurable via the [`pid table size` setting](#pid-table-size) in `ebpf.conf`. +The total memory usage is a well known [issue](https://lore.kernel.org/all/167821082315.1693.6957546778534183486.git-patchwork-notify@kernel.org/) +for eBPF, this is not a bug present in plugin. + ### SELinux When [SELinux](https://www.redhat.com/en/topics/linux/what-is-selinux) is enabled, it may prevent `ebpf.plugin` from diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c index c0764c60005ad2..ea53a3ba59344d 100644 --- a/collectors/ebpf.plugin/ebpf.c +++ b/collectors/ebpf.plugin/ebpf.c @@ -54,7 +54,8 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_PROCESS_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "socket", .config_name = "socket", .enabled = 0, .start_routine = ebpf_socket_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, @@ -62,7 +63,8 @@ ebpf_module_t ebpf_modules[] = { .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &socket_config, .config_file = NETDATA_NETWORK_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = socket_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = socket_targets, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "cachestat", .config_name = "cachestat", .enabled = 0, .start_routine = ebpf_cachestat_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, @@ -71,7 +73,8 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_CACHESTAT_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18| NETDATA_V5_4 | NETDATA_V5_14 | NETDATA_V5_15 | NETDATA_V5_16, - .load = EBPF_LOAD_LEGACY, .targets = cachestat_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = cachestat_targets, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "sync", .config_name = "sync", .enabled = 0, .start_routine = ebpf_sync_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, @@ -79,7 +82,8 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_SYNC_CONFIG_FILE, // All syscalls have the same kernels .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = sync_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = sync_targets, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "dc", .config_name = "dc", .enabled = 0, .start_routine = ebpf_dcstat_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, @@ -87,7 +91,8 @@ ebpf_module_t ebpf_modules[] = { .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &dcstat_config, .config_file = NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = dc_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = dc_targets, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "swap", .config_name = "swap", .enabled = 0, .start_routine = ebpf_swap_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, @@ -95,7 +100,8 @@ ebpf_module_t ebpf_modules[] = { .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &swap_config, .config_file = NETDATA_DIRECTORY_SWAP_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = swap_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = swap_targets, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "vfs", .config_name = "vfs", .enabled = 0, .start_routine = ebpf_vfs_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, @@ -103,28 +109,32 @@ ebpf_module_t ebpf_modules[] = { .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &vfs_config, .config_file = NETDATA_DIRECTORY_VFS_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = vfs_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = vfs_targets, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "filesystem", .config_name = "filesystem", .enabled = 0, .start_routine = ebpf_filesystem_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fs_config, .config_file = NETDATA_FILESYSTEM_CONFIG_FILE, //We are setting kernels as zero, because we load eBPF programs according the kernel running. - .kernels = 0, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL }, + .kernels = 0, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES }, { .thread_name = "disk", .config_name = "disk", .enabled = 0, .start_routine = ebpf_disk_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &disk_config, .config_file = NETDATA_DISK_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "mount", .config_name = "mount", .enabled = 0, .start_routine = ebpf_mount_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mount_config, .config_file = NETDATA_MOUNT_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = mount_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = mount_targets, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "fd", .config_name = "fd", .enabled = 0, .start_routine = ebpf_fd_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, @@ -133,21 +143,24 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_FD_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_11 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = fd_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = fd_targets, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "hardirq", .config_name = "hardirq", .enabled = 0, .start_routine = ebpf_hardirq_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &hardirq_config, .config_file = NETDATA_HARDIRQ_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "softirq", .config_name = "softirq", .enabled = 0, .start_routine = ebpf_softirq_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &softirq_config, .config_file = NETDATA_SOFTIRQ_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "oomkill", .config_name = "oomkill", .enabled = 0, .start_routine = ebpf_oomkill_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, @@ -155,7 +168,8 @@ ebpf_module_t ebpf_modules[] = { .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &oomkill_config, .config_file = NETDATA_OOMKILL_CONFIG_FILE, .kernels = NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "shm", .config_name = "shm", .enabled = 0, .start_routine = ebpf_shm_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, @@ -163,19 +177,21 @@ ebpf_module_t ebpf_modules[] = { .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &shm_config, .config_file = NETDATA_DIRECTORY_SHM_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = shm_targets, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = shm_targets, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = "mdflush", .config_name = "mdflush", .enabled = 0, .start_routine = ebpf_mdflush_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mdflush_config, .config_file = NETDATA_DIRECTORY_MDFLUSH_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, - .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, { .thread_name = NULL, .enabled = 0, .start_routine = NULL, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 0, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = 0, .names = NULL, .cfg = NULL, .config_name = NULL, .kernels = 0, .load = EBPF_LOAD_LEGACY, - .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL}, + .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, }; struct netdata_static_thread ebpf_threads[] = { @@ -360,7 +376,8 @@ ebpf_filesystem_partitions_t localfs[] = .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, - .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, + .fs_maps = NULL}, {.filesystem = "xfs", .optional_filesystem = NULL, .family = "xfs", @@ -369,7 +386,8 @@ ebpf_filesystem_partitions_t localfs[] = .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, - .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, + .fs_maps = NULL}, {.filesystem = "nfs", .optional_filesystem = "nfs4", .family = "nfs", @@ -378,7 +396,8 @@ ebpf_filesystem_partitions_t localfs[] = .flags = NETDATA_FILESYSTEM_ATTR_CHARTS, .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, - .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, + .fs_maps = NULL}, {.filesystem = "zfs", .optional_filesystem = NULL, .family = "zfs", @@ -387,7 +406,8 @@ ebpf_filesystem_partitions_t localfs[] = .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, - .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, + .fs_maps = NULL}, {.filesystem = "btrfs", .optional_filesystem = NULL, .family = "btrfs", @@ -396,7 +416,8 @@ ebpf_filesystem_partitions_t localfs[] = .flags = NETDATA_FILESYSTEM_FILL_ADDRESS_TABLE, .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = "btrfs_file_operations", .addr = 0}, - .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10, + .fs_maps = NULL}, {.filesystem = NULL, .optional_filesystem = NULL, .family = NULL, @@ -405,43 +426,50 @@ ebpf_filesystem_partitions_t localfs[] = .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, - .kernels = 0}}; + .kernels = 0, .fs_maps = NULL}}; ebpf_sync_syscalls_t local_syscalls[] = { {.syscall = NETDATA_SYSCALLS_SYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, #ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL + .sync_obj = NULL, #endif + .sync_maps = NULL }, {.syscall = NETDATA_SYSCALLS_SYNCFS, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, #ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL + .sync_obj = NULL, #endif + .sync_maps = NULL }, {.syscall = NETDATA_SYSCALLS_MSYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, #ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL + .sync_obj = NULL, #endif + .sync_maps = NULL }, {.syscall = NETDATA_SYSCALLS_FSYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, #ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL + .sync_obj = NULL, #endif + .sync_maps = NULL }, {.syscall = NETDATA_SYSCALLS_FDATASYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, #ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL + .sync_obj = NULL, #endif + .sync_maps = NULL }, {.syscall = NETDATA_SYSCALLS_SYNC_FILE_RANGE, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, #ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL + .sync_obj = NULL, #endif + .sync_maps = NULL }, {.syscall = NULL, .enabled = CONFIG_BOOLEAN_NO, .objects = NULL, .probe_links = NULL, #ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL + .sync_obj = NULL, #endif + .sync_maps = NULL } }; @@ -1737,6 +1765,21 @@ static inline void epbf_update_load_mode(char *str, netdata_ebpf_load_mode_t ori ebpf_set_load_mode(load, origin); } +/** + * Update Map per core + * + * Define the map type used with some hash tables. + */ +static void ebpf_update_map_per_core() +{ + int i; + int value = appconfig_get_boolean(&collector_config, EBPF_GLOBAL_SECTION, + EBPF_CFG_MAPS_PER_CORE, CONFIG_BOOLEAN_YES); + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].maps_per_core = value; + } +} + /** * Read collector values * @@ -1790,6 +1833,8 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, enabled = appconfig_get_boolean(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_CGROUP, CONFIG_BOOLEAN_NO); *disable_cgroups = (enabled == CONFIG_BOOLEAN_NO)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_NO; + ebpf_update_map_per_core(); + // Read ebpf programs section enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, ebpf_modules[EBPF_MODULE_PROCESS_IDX].config_name, CONFIG_BOOLEAN_YES); @@ -2540,6 +2585,7 @@ int main(int argc, char **argv) heartbeat_init(&hb); int update_apps_every = (int) EBPF_CFG_UPDATE_APPS_EVERY_DEFAULT; int update_apps_list = update_apps_every - 1; + int process_maps_per_core = ebpf_modules[EBPF_MODULE_PROCESS_IDX].maps_per_core; //Plugin will be killed when it receives a signal while (!ebpf_exit_plugin) { (void)heartbeat_next(&hb, step); @@ -2550,7 +2596,7 @@ int main(int argc, char **argv) if (++update_apps_list == update_apps_every) { update_apps_list = 0; cleanup_exited_pids(); - collect_data_for_all_processes(process_pid_fd); + collect_data_for_all_processes(process_pid_fd, process_maps_per_core); pthread_mutex_lock(&lock); ebpf_create_apps_charts(apps_groups_root_target); @@ -2565,3 +2611,4 @@ int main(int argc, char **argv) return 0; } + diff --git a/collectors/ebpf.plugin/ebpf.d.conf b/collectors/ebpf.plugin/ebpf.d.conf index 6a5ec5c39cdacb..483c7752f5eca9 100644 --- a/collectors/ebpf.plugin/ebpf.d.conf +++ b/collectors/ebpf.plugin/ebpf.d.conf @@ -15,6 +15,10 @@ # # The `pid table size` defines the maximum number of PIDs stored in the application hash tables. # +# The `btf path` specifies where to find the BTF files. +# +# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.15. +# [global] ebpf load mode = entry apps = no @@ -22,6 +26,7 @@ update every = 5 pid table size = 32768 btf path = /sys/kernel/btf/ + maps per core = yes # # eBPF Programs diff --git a/collectors/ebpf.plugin/ebpf.d/cachestat.conf b/collectors/ebpf.plugin/ebpf.d/cachestat.conf index 52466be5153932..82f870c983ceb0 100644 --- a/collectors/ebpf.plugin/ebpf.d/cachestat.conf +++ b/collectors/ebpf.plugin/ebpf.d/cachestat.conf @@ -24,6 +24,8 @@ # `parent` : Only stores parent PID. # `all` : Stores all PIDs used by software. This is the most expensive option. # +# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -34,3 +36,4 @@ ebpf type format = auto ebpf co-re tracing = trampoline collect pid = real parent +# maps per core = yes diff --git a/collectors/ebpf.plugin/ebpf.d/dcstat.conf b/collectors/ebpf.plugin/ebpf.d/dcstat.conf index 8aed8f7835b142..f741b62a8b12b2 100644 --- a/collectors/ebpf.plugin/ebpf.d/dcstat.conf +++ b/collectors/ebpf.plugin/ebpf.d/dcstat.conf @@ -22,6 +22,8 @@ # `parent` : Only stores parent PID. # `all` : Stores all PIDs used by software. This is the most expensive option. # +# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -32,3 +34,4 @@ ebpf type format = auto ebpf co-re tracing = trampoline collect pid = real parent +# maps per core = yes diff --git a/collectors/ebpf.plugin/ebpf.d/fd.conf b/collectors/ebpf.plugin/ebpf.d/fd.conf index 8333520fcf2777..30a5fcfd9e2355 100644 --- a/collectors/ebpf.plugin/ebpf.d/fd.conf +++ b/collectors/ebpf.plugin/ebpf.d/fd.conf @@ -10,6 +10,8 @@ # # The `pid table size` defines the maximum number of PIDs stored inside the hash table. # +# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -19,3 +21,4 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline +# maps per core = yes diff --git a/collectors/ebpf.plugin/ebpf.d/network.conf b/collectors/ebpf.plugin/ebpf.d/network.conf index d939d8e1fdcc75..75644a772dbd3a 100644 --- a/collectors/ebpf.plugin/ebpf.d/network.conf +++ b/collectors/ebpf.plugin/ebpf.d/network.conf @@ -24,6 +24,9 @@ # `tracepoint`: When available, the eBPF collector will use kernel tracepoint to monitor syscall. # `probe` : This is the same as legacy code. # +# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. +# +# Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry # apps = yes @@ -35,6 +38,7 @@ udp connection table size = 4096 ebpf type format = auto ebpf co-re tracing = trampoline + maps per core = no # # Network Connection diff --git a/collectors/ebpf.plugin/ebpf.d/process.conf b/collectors/ebpf.plugin/ebpf.d/process.conf index 1da5f84d3ac878..f5e8804cdcc931 100644 --- a/collectors/ebpf.plugin/ebpf.d/process.conf +++ b/collectors/ebpf.plugin/ebpf.d/process.conf @@ -15,11 +15,14 @@ # `parent` : Only stores parent PID. # `all` : Stores all PIDs used by software. This is the most expensive option. # +# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. +# # Uncomment lines to define specific options for thread. -#[global] +[global] # ebpf load mode = entry # apps = yes # cgroups = no # update every = 10 # pid table size = 32768 -# collect pid = real parent + collect pid = real parent +# maps per core = yes diff --git a/collectors/ebpf.plugin/ebpf.d/shm.conf b/collectors/ebpf.plugin/ebpf.d/shm.conf index 23ab96da4bbe25..f8ec1a18f38fa6 100644 --- a/collectors/ebpf.plugin/ebpf.d/shm.conf +++ b/collectors/ebpf.plugin/ebpf.d/shm.conf @@ -18,6 +18,8 @@ # `tracepoint`: When available, the eBPF collector will use kernel tracepoint to monitor syscall. # `probe` : This is the same as legacy code. # +# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -27,6 +29,7 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline +# maps per core = yes # List of monitored syscalls [syscalls] diff --git a/collectors/ebpf.plugin/ebpf.d/swap.conf b/collectors/ebpf.plugin/ebpf.d/swap.conf index 3986ae4f88d3e3..5bad0442481c24 100644 --- a/collectors/ebpf.plugin/ebpf.d/swap.conf +++ b/collectors/ebpf.plugin/ebpf.d/swap.conf @@ -17,6 +17,8 @@ # `trampoline`: This is the default mode used by the eBPF collector, due the small overhead added to host. # `probe` : This is the same as legacy code. # +# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -26,3 +28,4 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline +# maps per core = yes diff --git a/collectors/ebpf.plugin/ebpf.d/sync.conf b/collectors/ebpf.plugin/ebpf.d/sync.conf index ebec5d38e48d5b..fefbd4ee67e276 100644 --- a/collectors/ebpf.plugin/ebpf.d/sync.conf +++ b/collectors/ebpf.plugin/ebpf.d/sync.conf @@ -17,7 +17,10 @@ # `trampoline`: This is the default mode used by the eBPF collector, due the small overhead added to host. # `tracepoint`: When available, the eBPF collector will use kernel tracepoint to monitor syscall. # `probe` : This is the same as legacy code. +# +# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry # apps = yes @@ -25,6 +28,7 @@ # update every = 10 ebpf type format = auto ebpf co-re tracing = trampoline +# maps per core = yes # List of monitored syscalls [syscalls] diff --git a/collectors/ebpf.plugin/ebpf.d/vfs.conf b/collectors/ebpf.plugin/ebpf.d/vfs.conf index fa5d5b4e9558d3..b4e5daac0d645a 100644 --- a/collectors/ebpf.plugin/ebpf.d/vfs.conf +++ b/collectors/ebpf.plugin/ebpf.d/vfs.conf @@ -8,6 +8,18 @@ # If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change # the setting `apps` and `cgroups` to 'no'. # +# The `ebpf type format` option accepts the following values : +# `auto` : The eBPF collector will investigate hardware and select between the two next options. +# `legacy`: The eBPF collector will load the legacy code. Note: This has a bigger overload. +# `co-re` : The eBPF collector will use latest tracing method. Note: This is not available on all platforms. +# +# The `ebpf co-re tracing` option accepts the following values: +# `trampoline`: This is the default mode used by the eBPF collector, due the small overhead added to host. +# `tracepoint`: When available, the eBPF collector will use kernel tracepoint to monitor syscall. +# `probe` : This is the same as legacy code. +# +# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -17,3 +29,4 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline +# maps per core = yes diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h index 5b48adc624cfff..f9a19233c3a1d0 100644 --- a/collectors/ebpf.plugin/ebpf.h +++ b/collectors/ebpf.plugin/ebpf.h @@ -159,6 +159,7 @@ typedef struct ebpf_tracepoint { #define NETDATA_EBPF_LOAD_METHOD "ebpf_load_methods" #define NETDATA_EBPF_KERNEL_MEMORY "ebpf_kernel_memory" #define NETDATA_EBPF_HASH_TABLES_LOADED "ebpf_hash_tables_count" +#define NETDATA_EBPF_HASH_TABLES_PER_CORE "ebpf_hash_tables_per_core" // Log file #define NETDATA_DEVELOPER_LOG_FILE "developer.log" diff --git a/collectors/ebpf.plugin/ebpf_apps.c b/collectors/ebpf.plugin/ebpf_apps.c index d6db4c67607349..3826f8efcf9907 100644 --- a/collectors/ebpf.plugin/ebpf_apps.c +++ b/collectors/ebpf.plugin/ebpf_apps.c @@ -1414,6 +1414,28 @@ static inline void aggregate_pid_on_target(struct ebpf_target *w, struct ebpf_pi w->root_pid = pid_on_target; } +/** + * Process Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + * @param maps_per_core do I need to read all cores? + */ +void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core) +{ + int i, end = (maps_per_core) ? ebpf_nprocs : 1; + ebpf_process_stat_t *total = &out[0]; + for (i = 1; i < end; i++) { + ebpf_process_stat_t *w = &out[i]; + total->exit_call += w->exit_call; + total->task_err += w->task_err; + total->create_thread += w->create_thread; + total->create_process += w->create_process; + total->release_call += w->release_call; + } +} + /** * Collect data for all process * @@ -1421,8 +1443,9 @@ static inline void aggregate_pid_on_target(struct ebpf_target *w, struct ebpf_pi * It also creates the link between targets and PIDs. * * @param tbl_pid_stats_fd The mapped file descriptor for the hash table. + * @param maps_per_core do I have hash maps per core? */ -void collect_data_for_all_processes(int tbl_pid_stats_fd) +void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core) { if (unlikely(!ebpf_all_pids)) return; @@ -1448,6 +1471,10 @@ void collect_data_for_all_processes(int tbl_pid_stats_fd) uint32_t key; pids = ebpf_root_of_pids; // global list of all processes running // while (bpf_map_get_next_key(tbl_pid_stats_fd, &key, &next_key) == 0) { + size_t length = sizeof(ebpf_process_stat_t); + if (maps_per_core) + length *= ebpf_nprocs; + while (pids) { key = pids->pid; ebpf_process_stat_t *w = global_process_stats[key]; @@ -1456,7 +1483,7 @@ void collect_data_for_all_processes(int tbl_pid_stats_fd) global_process_stats[key] = w; } - if (bpf_map_lookup_elem(tbl_pid_stats_fd, &key, w)) { + if (bpf_map_lookup_elem(tbl_pid_stats_fd, &key, process_stat_vector)) { // Clean Process structures ebpf_process_stat_release(w); global_process_stats[key] = NULL; @@ -1467,6 +1494,12 @@ void collect_data_for_all_processes(int tbl_pid_stats_fd) continue; } + ebpf_process_apps_accumulator(process_stat_vector, maps_per_core); + + memcpy(w, process_stat_vector, sizeof(ebpf_process_stat_t)); + + memset(process_stat_vector, 0, length); + pids = pids->next; } diff --git a/collectors/ebpf.plugin/ebpf_apps.h b/collectors/ebpf.plugin/ebpf_apps.h index d33442af5c4801..ad2e338d415f77 100644 --- a/collectors/ebpf.plugin/ebpf_apps.h +++ b/collectors/ebpf.plugin/ebpf_apps.h @@ -213,7 +213,8 @@ size_t read_processes_statistic_using_pid_on_target(ebpf_process_stat_t **ep, size_t read_bandwidth_statistic_using_pid_on_target(ebpf_bandwidth_t **ep, int fd, struct ebpf_pid_on_target *pids); -void collect_data_for_all_processes(int tbl_pid_stats_fd); +void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core); +void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core); extern ebpf_process_stat_t **global_process_stats; extern netdata_publish_cachestat_t **cachestat_pid; @@ -235,6 +236,7 @@ extern void ebpf_aral_init(void); extern ebpf_process_stat_t *ebpf_process_stat_get(void); extern void ebpf_process_stat_release(ebpf_process_stat_t *stat); +extern ebpf_process_stat_t *process_stat_vector; extern ARAL *ebpf_aral_socket_pid; void ebpf_socket_aral_init(); diff --git a/collectors/ebpf.plugin/ebpf_cachestat.c b/collectors/ebpf.plugin/ebpf_cachestat.c index b2b006dd3df5c3..5bbbe1f43bb56e 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.c +++ b/collectors/ebpf.plugin/ebpf_cachestat.c @@ -14,19 +14,34 @@ static netdata_idx_t cachestat_hash_values[NETDATA_CACHESTAT_END]; static netdata_idx_t *cachestat_values = NULL; ebpf_local_maps_t cachestat_maps[] = {{.name = "cstat_global", .internal_input = NETDATA_CACHESTAT_END, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "cstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, - .user_input = 0, - .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "cstat_ctrl", .internal_input = NETDATA_CONTROLLER_END, - .user_input = 0, - .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = NULL, .internal_input = 0, .user_input = 0, - .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = "cstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, + {.name = "cstat_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION +#endif + }}; struct config cachestat_config = { .first_section = NULL, .last_section = NULL, @@ -233,10 +248,14 @@ static int ebpf_cachestat_attach_probe(struct cachestat_bpf *obj) * @param obj is the main structure for bpf objects. * @param em structure with configuration */ -static void ebpf_cachestat_adjust_map_size(struct cachestat_bpf *obj, ebpf_module_t *em) +static void ebpf_cachestat_adjust_map(struct cachestat_bpf *obj, ebpf_module_t *em) { ebpf_update_map_size(obj->maps.cstat_pid, &cachestat_maps[NETDATA_CACHESTAT_PID_STATS], em, bpf_map__name(obj->maps.cstat_pid)); + + ebpf_update_map_type(obj->maps.cstat_global, &cachestat_maps[NETDATA_CACHESTAT_GLOBAL_STATS]); + ebpf_update_map_type(obj->maps.cstat_pid, &cachestat_maps[NETDATA_CACHESTAT_PID_STATS]); + ebpf_update_map_type(obj->maps.cstat_ctrl, &cachestat_maps[NETDATA_CACHESTAT_CTRL]); } /** @@ -291,7 +310,7 @@ static inline int ebpf_cachestat_load_and_attach(struct cachestat_bpf *obj, ebpf ebpf_cachestat_disable_specific_probe(obj); } - ebpf_cachestat_adjust_map_size(obj, em); + ebpf_cachestat_adjust_map(obj, em); if (!em->apps_charts && !em->cgroup_charts) ebpf_cachestat_disable_release_task(obj); @@ -445,10 +464,11 @@ static void calculate_stats(netdata_publish_cachestat_t *publish) { * Sum all values read from kernel and store in the first address. * * @param out the vector with read values. + * @param maps_per_core do I need to read all cores? */ -static void cachestat_apps_accumulator(netdata_cachestat_pid_t *out) +static void cachestat_apps_accumulator(netdata_cachestat_pid_t *out, int maps_per_core) { - int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + int i, end = (maps_per_core) ? ebpf_nprocs : 1; netdata_cachestat_pid_t *total = &out[0]; for (i = 1; i < end; i++) { netdata_cachestat_pid_t *w = &out[i]; @@ -504,14 +524,19 @@ static void cachestat_fill_pid(uint32_t current_pid, netdata_cachestat_pid_t *pu * Read APPS table * * Read the apps table and store data inside the structure. + * + * @param maps_per_core do I need to read all cores? */ -static void read_apps_table() +static void ebpf_read_cachestat_apps_table(int maps_per_core) { netdata_cachestat_pid_t *cv = cachestat_vector; uint32_t key; struct ebpf_pid_stat *pids = ebpf_root_of_pids; int fd = cachestat_maps[NETDATA_CACHESTAT_PID_STATS].map_fd; - size_t length = sizeof(netdata_cachestat_pid_t)*ebpf_nprocs; + size_t length = sizeof(netdata_cachestat_pid_t); + if (maps_per_core) + length *= ebpf_nprocs; + while (pids) { key = pids->pid; @@ -520,7 +545,7 @@ static void read_apps_table() continue; } - cachestat_apps_accumulator(cv); + cachestat_apps_accumulator(cv, maps_per_core); cachestat_fill_pid(key, cv); @@ -535,12 +560,16 @@ static void read_apps_table() * Update cgroup * * Update cgroup data based in + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_update_cachestat_cgroup() +static void ebpf_update_cachestat_cgroup(int maps_per_core) { netdata_cachestat_pid_t *cv = cachestat_vector; int fd = cachestat_maps[NETDATA_CACHESTAT_PID_STATS].map_fd; - size_t length = sizeof(netdata_cachestat_pid_t) * ebpf_nprocs; + size_t length = sizeof(netdata_cachestat_pid_t); + if (maps_per_core) + length *= ebpf_nprocs; ebpf_cgroup_target_t *ect; pthread_mutex_lock(&mutex_cgroup_shm); @@ -559,7 +588,7 @@ static void ebpf_update_cachestat_cgroup() continue; } - cachestat_apps_accumulator(cv); + cachestat_apps_accumulator(cv, maps_per_core); memcpy(out, cv, sizeof(netdata_cachestat_pid_t)); } @@ -627,8 +656,10 @@ void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *ptr) * Read global counter * * Read the table with number of calls for all functions + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_cachestat_read_global_table() +static void ebpf_cachestat_read_global_table(int maps_per_core) { uint32_t idx; netdata_idx_t *val = cachestat_hash_values; @@ -638,7 +669,7 @@ static void ebpf_cachestat_read_global_table() for (idx = NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU; idx < NETDATA_CACHESTAT_END; idx++) { if (!bpf_map_lookup_elem(fd, &idx, stored)) { int i; - int end = ebpf_nprocs; + int end = (maps_per_core) ? ebpf_nprocs: 1; netdata_idx_t total = 0; for (i = 0; i < end; i++) total += stored[i]; @@ -1053,6 +1084,7 @@ static void cachestat_collector(ebpf_module_t *em) memset(&publish, 0, sizeof(publish)); int cgroups = em->cgroup_charts; int update_every = em->update_every; + int maps_per_core = em->maps_per_core; heartbeat_t hb; heartbeat_init(&hb); int counter = update_every - 1; @@ -1065,13 +1097,13 @@ static void cachestat_collector(ebpf_module_t *em) counter = 0; netdata_apps_integration_flags_t apps = em->apps_charts; - ebpf_cachestat_read_global_table(); + ebpf_cachestat_read_global_table(maps_per_core); pthread_mutex_lock(&collect_data_mutex); if (apps) - read_apps_table(); + ebpf_read_cachestat_apps_table(maps_per_core); if (cgroups) - ebpf_update_cachestat_cgroup(); + ebpf_update_cachestat_cgroup(maps_per_core); pthread_mutex_lock(&lock); @@ -1216,6 +1248,10 @@ static int ebpf_cachestat_set_internal_value() */ static int ebpf_cachestat_load_bpf(ebpf_module_t *em) { +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(cachestat_maps, em->maps_per_core, running_on_kernel); +#endif + int ret = 0; ebpf_adjust_apps_cgroup(em, em->targets[NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU].mode); if (em->load & EBPF_LOAD_LEGACY) { diff --git a/collectors/ebpf.plugin/ebpf_dcstat.c b/collectors/ebpf.plugin/ebpf_dcstat.c index 5f14006015fb33..5a07e4619bfe22 100644 --- a/collectors/ebpf.plugin/ebpf_dcstat.c +++ b/collectors/ebpf.plugin/ebpf_dcstat.c @@ -19,19 +19,35 @@ struct config dcstat_config = { .first_section = NULL, .rwlock = AVL_LOCK_INITIALIZER } }; ebpf_local_maps_t dcstat_maps[] = {{.name = "dcstat_global", .internal_input = NETDATA_DIRECTORY_CACHE_END, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "dcstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, - .user_input = 0, - .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "dcstat_ctrl", .internal_input = NETDATA_CONTROLLER_END, - .user_input = 0, - .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = NULL, .internal_input = 0, .user_input = 0, - .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = "dcstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, + {.name = "dcstat_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; static ebpf_specify_name_t dc_optional_name[] = { {.program_name = "netdata_lookup_fast", .function_to_attach = "lookup_fast", @@ -138,10 +154,14 @@ static int ebpf_dc_attach_probes(struct dc_bpf *obj) * @param obj is the main structure for bpf objects. * @param em structure with configuration */ -static void ebpf_dc_adjust_map_size(struct dc_bpf *obj, ebpf_module_t *em) +static void ebpf_dc_adjust_map(struct dc_bpf *obj, ebpf_module_t *em) { ebpf_update_map_size(obj->maps.dcstat_pid, &dcstat_maps[NETDATA_DCSTAT_PID_STATS], em, bpf_map__name(obj->maps.dcstat_pid)); + + ebpf_update_map_type(obj->maps.dcstat_global, &dcstat_maps[NETDATA_DCSTAT_GLOBAL_STATS]); + ebpf_update_map_type(obj->maps.dcstat_pid, &dcstat_maps[NETDATA_DCSTAT_PID_STATS]); + ebpf_update_map_type(obj->maps.dcstat_ctrl, &dcstat_maps[NETDATA_DCSTAT_CTRL]); } /** @@ -215,7 +235,7 @@ static inline int ebpf_dc_load_and_attach(struct dc_bpf *obj, ebpf_module_t *em) ebpf_dc_disable_trampoline(obj); } - ebpf_dc_adjust_map_size(obj, em); + ebpf_dc_adjust_map(obj, em); if (!em->apps_charts && !em->cgroup_charts) ebpf_dc_disable_release_task(obj); @@ -382,10 +402,11 @@ void ebpf_dcstat_create_apps_charts(struct ebpf_module *em, void *ptr) * Sum all values read from kernel and store in the first address. * * @param out the vector with read values. + * @param maps_per_core do I need to read all cores? */ -static void dcstat_apps_accumulator(netdata_dcstat_pid_t *out) +static void dcstat_apps_accumulator(netdata_dcstat_pid_t *out, int maps_per_core) { - int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + int i, end = (maps_per_core) ? ebpf_nprocs : 1; netdata_dcstat_pid_t *total = &out[0]; for (i = 1; i < end; i++) { netdata_dcstat_pid_t *w = &out[i]; @@ -428,17 +449,22 @@ static void dcstat_fill_pid(uint32_t current_pid, netdata_dcstat_pid_t *publish) } /** - * Read APPS table + * Read Directory Cache APPS table * * Read the apps table and store data inside the structure. + * + * @param maps_per_core do I need to read all cores? */ -static void read_apps_table() +static void read_dc_apps_table(int maps_per_core) { netdata_dcstat_pid_t *cv = dcstat_vector; uint32_t key; struct ebpf_pid_stat *pids = ebpf_root_of_pids; int fd = dcstat_maps[NETDATA_DCSTAT_PID_STATS].map_fd; - size_t length = sizeof(netdata_dcstat_pid_t)*ebpf_nprocs; + size_t length = sizeof(netdata_dcstat_pid_t); + if (maps_per_core) + length *= ebpf_nprocs; + while (pids) { key = pids->pid; @@ -447,7 +473,7 @@ static void read_apps_table() continue; } - dcstat_apps_accumulator(cv); + dcstat_apps_accumulator(cv, maps_per_core); dcstat_fill_pid(key, cv); @@ -461,9 +487,11 @@ static void read_apps_table() /** * Update cgroup * - * Update cgroup data based in + * Update cgroup data based in collected PID. + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_update_dc_cgroup() +static void ebpf_update_dc_cgroup(int maps_per_core) { netdata_dcstat_pid_t *cv = dcstat_vector; int fd = dcstat_maps[NETDATA_DCSTAT_PID_STATS].map_fd; @@ -486,7 +514,7 @@ static void ebpf_update_dc_cgroup() continue; } - dcstat_apps_accumulator(cv); + dcstat_apps_accumulator(cv, maps_per_core); memcpy(out, cv, sizeof(netdata_dcstat_pid_t)); } @@ -499,8 +527,10 @@ static void ebpf_update_dc_cgroup() * Read global table * * Read the table with number of calls for all functions + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_dc_read_global_table() +static void ebpf_dc_read_global_table(int maps_per_core) { uint32_t idx; netdata_idx_t *val = dcstat_hash_values; @@ -510,7 +540,7 @@ static void ebpf_dc_read_global_table() for (idx = NETDATA_KEY_DC_REFERENCE; idx < NETDATA_DIRECTORY_CACHE_END; idx++) { if (!bpf_map_lookup_elem(fd, &idx, stored)) { int i; - int end = ebpf_nprocs; + int end = (maps_per_core) ? ebpf_nprocs: 1; netdata_idx_t total = 0; for (i = 0; i < end; i++) total += stored[i]; @@ -974,6 +1004,7 @@ static void dcstat_collector(ebpf_module_t *em) heartbeat_t hb; heartbeat_init(&hb); int counter = update_every - 1; + int maps_per_core = em->maps_per_core; while (!ebpf_exit_plugin) { (void)heartbeat_next(&hb, USEC_PER_SEC); @@ -982,13 +1013,13 @@ static void dcstat_collector(ebpf_module_t *em) counter = 0; netdata_apps_integration_flags_t apps = em->apps_charts; - ebpf_dc_read_global_table(); + ebpf_dc_read_global_table(maps_per_core); pthread_mutex_lock(&collect_data_mutex); if (apps) - read_apps_table(); + read_dc_apps_table(maps_per_core); if (cgroups) - ebpf_update_dc_cgroup(); + ebpf_update_dc_cgroup(maps_per_core); pthread_mutex_lock(&lock); @@ -1084,6 +1115,10 @@ static void ebpf_dcstat_allocate_global_vectors(int apps) */ static int ebpf_dcstat_load_bpf(ebpf_module_t *em) { +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(dcstat_maps, em->maps_per_core, running_on_kernel); +#endif + int ret = 0; ebpf_adjust_apps_cgroup(em, em->targets[NETDATA_DC_TARGET_LOOKUP_FAST].mode); if (em->load & EBPF_LOAD_LEGACY) { diff --git a/collectors/ebpf.plugin/ebpf_disk.c b/collectors/ebpf.plugin/ebpf_disk.c index e1a579441cfeb2..71c9727779983f 100644 --- a/collectors/ebpf.plugin/ebpf_disk.c +++ b/collectors/ebpf.plugin/ebpf_disk.c @@ -14,10 +14,25 @@ struct config disk_config = { .first_section = NULL, static ebpf_local_maps_t disk_maps[] = {{.name = "tbl_disk_iocall", .internal_input = NETDATA_DISK_HISTOGRAM_LENGTH, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, + {.name = "tmp_disk_tp_stat", .internal_input = 8192, .user_input = 8192, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, {.name = NULL, .internal_input = 0, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; static avl_tree_lock disk_tree; netdata_ebpf_disks_t *disk_list = NULL; @@ -503,11 +518,12 @@ static void ebpf_fill_plot_disks(netdata_ebpf_disks_t *ptr) /** * Read hard disk table * - * @param table file descriptor for table - * * Read the table with number of calls for all functions + * + * @param table file descriptor for table + * @param maps_per_core do I need to read all cores? */ -static void read_hard_disk_tables(int table) +static void read_hard_disk_tables(int table, int maps_per_core) { netdata_idx_t *values = disk_hash_values; block_key_t key = {}; @@ -548,7 +564,7 @@ static void read_hard_disk_tables(int table) uint64_t total = 0; int i; - int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + int end = (maps_per_core) ? 1 : ebpf_nprocs; for (i = 0; i < end; i++) { total += values[i]; } @@ -690,6 +706,7 @@ static void disk_collector(ebpf_module_t *em) heartbeat_t hb; heartbeat_init(&hb); int counter = update_every - 1; + int maps_per_core = em->maps_per_core; while (!ebpf_exit_plugin) { (void)heartbeat_next(&hb, USEC_PER_SEC); @@ -697,7 +714,7 @@ static void disk_collector(ebpf_module_t *em) continue; counter = 0; - read_hard_disk_tables(disk_maps[NETDATA_DISK_READ].map_fd); + read_hard_disk_tables(disk_maps[NETDATA_DISK_READ].map_fd, maps_per_core); pthread_mutex_lock(&lock); ebpf_remove_pointer_from_plot_disk(em); ebpf_latency_send_hd_data(update_every); @@ -774,6 +791,9 @@ void *ebpf_disk_thread(void *ptr) goto enddisk; } +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(disk_maps, em->maps_per_core, running_on_kernel); +#endif em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); if (!em->probe_links) { goto enddisk; diff --git a/collectors/ebpf.plugin/ebpf_disk.h b/collectors/ebpf.plugin/ebpf_disk.h index c606d6594f4745..69c705875875be 100644 --- a/collectors/ebpf.plugin/ebpf_disk.h +++ b/collectors/ebpf.plugin/ebpf_disk.h @@ -55,7 +55,8 @@ typedef struct netdata_ebpf_disks { } netdata_ebpf_disks_t; enum ebpf_disk_tables { - NETDATA_DISK_READ + NETDATA_DISK_READ, + NETDATA_DISK_TMP }; typedef struct block_key { diff --git a/collectors/ebpf.plugin/ebpf_fd.c b/collectors/ebpf.plugin/ebpf_fd.c index 96da91b0a3bcfa..6d3868952dd2e1 100644 --- a/collectors/ebpf.plugin/ebpf_fd.c +++ b/collectors/ebpf.plugin/ebpf_fd.c @@ -15,17 +15,33 @@ static netdata_publish_syscall_t fd_publish_aggregated[NETDATA_FD_SYSCALL_END]; static ebpf_local_maps_t fd_maps[] = {{.name = "tbl_fd_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, .user_input = 0, .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, {.name = "tbl_fd_global", .internal_input = NETDATA_KEY_END_VECTOR, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, {.name = "fd_ctrl", .internal_input = NETDATA_CONTROLLER_END, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, {.name = NULL, .internal_input = 0, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; struct config fd_config = { .first_section = NULL, .last_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, @@ -271,10 +287,14 @@ static void ebpf_fd_set_hash_tables(struct fd_bpf *obj) * @param obj is the main structure for bpf objects. * @param em structure with configuration */ -static void ebpf_fd_adjust_map_size(struct fd_bpf *obj, ebpf_module_t *em) +static void ebpf_fd_adjust_map(struct fd_bpf *obj, ebpf_module_t *em) { ebpf_update_map_size(obj->maps.tbl_fd_pid, &fd_maps[NETDATA_FD_PID_STATS], em, bpf_map__name(obj->maps.tbl_fd_pid)); + + ebpf_update_map_type(obj->maps.tbl_fd_global, &fd_maps[NETDATA_FD_GLOBAL_STATS]); + ebpf_update_map_type(obj->maps.tbl_fd_pid, &fd_maps[NETDATA_FD_PID_STATS]); + ebpf_update_map_type(obj->maps.fd_ctrl, &fd_maps[NETDATA_FD_CONTROLLER]); } /** @@ -322,7 +342,7 @@ static inline int ebpf_fd_load_and_attach(struct fd_bpf *obj, ebpf_module_t *em) ebpf_disable_specific_probes(obj); } - ebpf_fd_adjust_map_size(obj, em); + ebpf_fd_adjust_map(obj, em); if (!em->apps_charts && !em->cgroup_charts) ebpf_fd_disable_release_task(obj); @@ -415,8 +435,10 @@ static void ebpf_fd_send_data(ebpf_module_t *em) * Read global counter * * Read the table with number of calls for all functions + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_fd_read_global_table() +static void ebpf_fd_read_global_table(int maps_per_core) { uint32_t idx; netdata_idx_t *val = fd_hash_values; @@ -426,7 +448,7 @@ static void ebpf_fd_read_global_table() for (idx = NETDATA_KEY_CALLS_DO_SYS_OPEN; idx < NETDATA_FD_COUNTER; idx++) { if (!bpf_map_lookup_elem(fd, &idx, stored)) { int i; - int end = ebpf_nprocs; + int end = (maps_per_core) ? ebpf_nprocs: 1; netdata_idx_t total = 0; for (i = 0; i < end; i++) total += stored[i]; @@ -442,10 +464,11 @@ static void ebpf_fd_read_global_table() * Sum all values read from kernel and store in the first address. * * @param out the vector with read values. + * @param maps_per_core do I need to read all cores? */ -static void fd_apps_accumulator(netdata_fd_stat_t *out) +static void fd_apps_accumulator(netdata_fd_stat_t *out, int maps_per_core) { - int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + int i, end = (maps_per_core) ? ebpf_nprocs : 1; netdata_fd_stat_t *total = &out[0]; for (i = 1; i < end; i++) { netdata_fd_stat_t *w = &out[i]; @@ -479,14 +502,19 @@ static void fd_fill_pid(uint32_t current_pid, netdata_fd_stat_t *publish) * Read APPS table * * Read the apps table and store data inside the structure. + * + * @param maps_per_core do I need to read all cores? */ -static void read_apps_table() +static void read_fd_apps_table(int maps_per_core) { netdata_fd_stat_t *fv = fd_vector; uint32_t key; struct ebpf_pid_stat *pids = ebpf_root_of_pids; int fd = fd_maps[NETDATA_FD_PID_STATS].map_fd; - size_t length = sizeof(netdata_fd_stat_t) * ebpf_nprocs; + size_t length = sizeof(netdata_fd_stat_t); + if (maps_per_core) + length *= ebpf_nprocs; + while (pids) { key = pids->pid; @@ -495,7 +523,7 @@ static void read_apps_table() continue; } - fd_apps_accumulator(fv); + fd_apps_accumulator(fv, maps_per_core); fd_fill_pid(key, fv); @@ -509,9 +537,11 @@ static void read_apps_table() /** * Update cgroup * - * Update cgroup data based in + * Update cgroup data collected per PID. + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_update_fd_cgroup() +static void ebpf_update_fd_cgroup(int maps_per_core) { ebpf_cgroup_target_t *ect ; netdata_fd_stat_t *fv = fd_vector; @@ -531,7 +561,7 @@ static void ebpf_update_fd_cgroup() } else { memset(fv, 0, length); if (!bpf_map_lookup_elem(fd, &pid, fv)) { - fd_apps_accumulator(fv); + fd_apps_accumulator(fv, maps_per_core); memcpy(out, fv, sizeof(netdata_fd_stat_t)); } @@ -915,6 +945,7 @@ static void fd_collector(ebpf_module_t *em) heartbeat_init(&hb); int update_every = em->update_every; int counter = update_every - 1; + int maps_per_core = em->maps_per_core; while (!ebpf_exit_plugin) { (void)heartbeat_next(&hb, USEC_PER_SEC); @@ -923,21 +954,21 @@ static void fd_collector(ebpf_module_t *em) counter = 0; netdata_apps_integration_flags_t apps = em->apps_charts; - ebpf_fd_read_global_table(); + ebpf_fd_read_global_table(maps_per_core); pthread_mutex_lock(&collect_data_mutex); if (apps) - read_apps_table(); + read_fd_apps_table(maps_per_core); + + if (cgroups) + ebpf_update_fd_cgroup(maps_per_core); + + pthread_mutex_lock(&lock); #ifdef NETDATA_DEV_MODE if (ebpf_aral_fd_pid) ebpf_send_data_aral_chart(ebpf_aral_fd_pid, em); #endif - if (cgroups) - ebpf_update_fd_cgroup(); - - pthread_mutex_lock(&lock); - ebpf_fd_send_data(em); if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) @@ -1082,6 +1113,10 @@ static void ebpf_fd_allocate_global_vectors(int apps) */ static int ebpf_fd_load_bpf(ebpf_module_t *em) { +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(fd_maps, em->maps_per_core, running_on_kernel); +#endif + int ret = 0; ebpf_adjust_apps_cgroup(em, em->targets[NETDATA_FD_SYSCALL_OPEN].mode); if (em->load & EBPF_LOAD_LEGACY) { diff --git a/collectors/ebpf.plugin/ebpf_filesystem.c b/collectors/ebpf.plugin/ebpf_filesystem.c index f8b28195c953f7..63f592eb93c7c8 100644 --- a/collectors/ebpf.plugin/ebpf_filesystem.c +++ b/collectors/ebpf.plugin/ebpf_filesystem.c @@ -8,27 +8,122 @@ struct config fs_config = { .first_section = NULL, .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, .rwlock = AVL_LOCK_INITIALIZER } }; -static ebpf_local_maps_t fs_maps[] = {{.name = "tbl_ext4", .internal_input = NETDATA_KEY_CALLS_SYNC, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "tbl_xfs", .internal_input = NETDATA_KEY_CALLS_SYNC, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "tbl_nfs", .internal_input = NETDATA_KEY_CALLS_SYNC, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "tbl_zfs", .internal_input = NETDATA_KEY_CALLS_SYNC, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "tbl_btrfs", .internal_input = NETDATA_KEY_CALLS_SYNC, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "tbl_ext_addr", .internal_input = 1, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = NULL, .internal_input = 0, .user_input = 0, - .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; +ebpf_local_maps_t ext4_maps[] = {{.name = "tbl_ext4", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = "tmp_ext4", .internal_input = 4192, .user_input = 4192, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }}; + +ebpf_local_maps_t xfs_maps[] = {{.name = "tbl_xfs", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = "tmp_xfs", .internal_input = 4192, .user_input = 4192, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }}; + +ebpf_local_maps_t nfs_maps[] = {{.name = "tbl_nfs", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = "tmp_nfs", .internal_input = 4192, .user_input = 4192, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }}; + +ebpf_local_maps_t zfs_maps[] = {{.name = "tbl_zfs", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = "tmp_zfs", .internal_input = 4192, .user_input = 4192, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }}; + +ebpf_local_maps_t btrfs_maps[] = {{.name = "tbl_btrfs", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = "tbl_ext_addr", .internal_input = 1, .user_input = 1, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, + {.name = "tmp_btrfs", .internal_input = 4192, .user_input = 4192, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }}; static netdata_syscall_stat_t filesystem_aggregated_data[NETDATA_EBPF_HIST_MAX_BINS]; static netdata_publish_syscall_t filesystem_publish_aggregated[NETDATA_EBPF_HIST_MAX_BINS]; @@ -176,26 +271,32 @@ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em) if (!efp->probe_links && efp->flags & NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM) { em->thread_name = efp->filesystem; em->kernels = efp->kernels; + em->maps = efp->fs_maps; +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); +#endif efp->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &efp->objects); if (!efp->probe_links) { em->thread_name = saved_name; em->kernels = kernels; + em->maps = NULL; return -1; } efp->flags |= NETDATA_FILESYSTEM_FLAG_HAS_PARTITION; pthread_mutex_lock(&lock); - ebpf_update_kernel_memory(&plugin_statistics, &fs_maps[i], EBPF_ACTION_STAT_ADD); + ebpf_update_kernel_memory(&plugin_statistics, efp->fs_maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); // Nedeed for filesystems like btrfs if ((efp->flags & NETDATA_FILESYSTEM_FILL_ADDRESS_TABLE) && (efp->addresses.function)) { - ebpf_load_addresses(&efp->addresses, fs_maps[i + 1].map_fd); + ebpf_load_addresses(&efp->addresses, efp->fs_maps[NETDATA_ADDR_FS_TABLE].map_fd); } } efp->flags &= ~NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM; } em->thread_name = saved_name; em->kernels = kernels; + em->maps = NULL; if (!dimensions) { dimensions = ebpf_fill_histogram_dimension(NETDATA_EBPF_HIST_MAX_BINS); @@ -394,11 +495,13 @@ static inline netdata_ebpf_histogram_t *select_hist(ebpf_filesystem_partitions_t /** * Read hard disk table * - * @param table index for the hash table + * @param efp structure with filesystem monitored + * @param fd file descriptor to get data. + * @param maps_per_core do I need to read all cores? * * Read the table with number of calls for all functions */ -static void read_filesystem_table(ebpf_filesystem_partitions_t *efp, int fd) +static void read_filesystem_table(ebpf_filesystem_partitions_t *efp, int fd, int maps_per_core) { netdata_idx_t *values = filesystem_hash_values; uint32_t key; @@ -416,7 +519,7 @@ static void read_filesystem_table(ebpf_filesystem_partitions_t *efp, int fd) uint64_t total = 0; int i; - int end = ebpf_nprocs; + int end = (maps_per_core) ? ebpf_nprocs : 1; for (i = 0; i < end; i++) { total += values[i]; } @@ -430,17 +533,17 @@ static void read_filesystem_table(ebpf_filesystem_partitions_t *efp, int fd) /** * Read hard disk table * - * @param table index for the hash table - * * Read the table with number of calls for all functions + * + * @param maps_per_core do I need to read all cores? */ -static void read_filesystem_tables() +static void read_filesystem_tables(int maps_per_core) { int i; for (i = 0; localfs[i].filesystem; i++) { ebpf_filesystem_partitions_t *efp = &localfs[i]; if (efp->flags & NETDATA_FILESYSTEM_FLAG_HAS_PARTITION) { - read_filesystem_table(efp, fs_maps[i].map_fd); + read_filesystem_table(efp, efp->fs_maps[NETDATA_MAIN_FS_TABLE].map_fd, maps_per_core); } } } @@ -464,7 +567,7 @@ void ebpf_filesystem_read_hash(ebpf_module_t *em) if (em->optional) return; - read_filesystem_tables(); + read_filesystem_tables(em->maps_per_core); } /** @@ -545,6 +648,21 @@ static void ebpf_update_filesystem() } } +/** + * Set maps + * + * When thread is initialized the variable fs_maps is set as null, + * this function fills the variable before to use. + */ +static void ebpf_set_maps() +{ + localfs[NETDATA_FS_LOCALFS_EXT4].fs_maps = ext4_maps; + localfs[NETDATA_FS_LOCALFS_XFS].fs_maps = xfs_maps; + localfs[NETDATA_FS_LOCALFS_NFS].fs_maps = nfs_maps; + localfs[NETDATA_FS_LOCALFS_ZFS].fs_maps = zfs_maps; + localfs[NETDATA_FS_LOCALFS_BTRFS].fs_maps = btrfs_maps; +} + /** * Filesystem thread * @@ -559,7 +677,7 @@ void *ebpf_filesystem_thread(void *ptr) netdata_thread_cleanup_push(ebpf_filesystem_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; - em->maps = fs_maps; + ebpf_set_maps(); ebpf_update_filesystem(); // Initialize optional as zero, to identify when there are not partitions to monitor diff --git a/collectors/ebpf.plugin/ebpf_filesystem.h b/collectors/ebpf.plugin/ebpf_filesystem.h index cf19b253e177f3..b1126badb6b9ec 100644 --- a/collectors/ebpf.plugin/ebpf_filesystem.h +++ b/collectors/ebpf.plugin/ebpf_filesystem.h @@ -42,6 +42,16 @@ enum netdata_filesystem_table { NETDATA_ADDR_FS_TABLE }; +enum netdata_filesystem_localfs_idx { + NETDATA_FS_LOCALFS_EXT4, + NETDATA_FS_LOCALFS_XFS, + NETDATA_FS_LOCALFS_NFS, + NETDATA_FS_LOCALFS_ZFS, + NETDATA_FS_LOCALFS_BTRFS, + + NETDATA_FS_LOCALFS_END, +}; + void *ebpf_filesystem_thread(void *ptr); extern struct config fs_config; diff --git a/collectors/ebpf.plugin/ebpf_hardirq.c b/collectors/ebpf.plugin/ebpf_hardirq.c index b4d49dc00fd429..113648ec97b938 100644 --- a/collectors/ebpf.plugin/ebpf_hardirq.c +++ b/collectors/ebpf.plugin/ebpf_hardirq.c @@ -17,14 +17,20 @@ static ebpf_local_maps_t hardirq_maps[] = { .internal_input = NETDATA_HARDIRQ_MAX_IRQS, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif }, { .name = "tbl_hardirq_static", .internal_input = HARDIRQ_EBPF_STATIC_END, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif }, /* end */ { @@ -32,7 +38,10 @@ static ebpf_local_maps_t hardirq_maps[] = { .internal_input = 0, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif } }; @@ -555,6 +564,9 @@ void *ebpf_hardirq_thread(void *ptr) goto endhardirq; } +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); +#endif em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); if (!em->probe_links) { goto endhardirq; diff --git a/collectors/ebpf.plugin/ebpf_mdflush.c b/collectors/ebpf.plugin/ebpf_mdflush.c index fc794e5e552df4..321bd97ee2dccd 100644 --- a/collectors/ebpf.plugin/ebpf_mdflush.c +++ b/collectors/ebpf.plugin/ebpf_mdflush.c @@ -16,7 +16,10 @@ static ebpf_local_maps_t mdflush_maps[] = { .internal_input = 1024, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif }, /* end */ { @@ -87,7 +90,14 @@ static int mdflush_val_cmp(void *a, void *b) } } -static void mdflush_read_count_map() +/** + * Read count map + * + * Read the hash table and store data to allocated vectors. + * + * @param maps_per_core do I need to read all cores? + */ +static void mdflush_read_count_map(int maps_per_core) { int mapfd = mdflush_maps[MDFLUSH_MAP_COUNT].map_fd; mdflush_ebpf_key_t curr_key = (uint32_t)-1; @@ -137,7 +147,7 @@ static void mdflush_read_count_map() // we must add up count value for this record across all CPUs. uint64_t total_cnt = 0; int i; - int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + int end = (!maps_per_core) ? 1 : ebpf_nprocs; for (i = 0; i < end; i++) { total_cnt += mdflush_ebpf_vals[i]; } @@ -215,6 +225,7 @@ static void mdflush_collector(ebpf_module_t *em) heartbeat_t hb; heartbeat_init(&hb); int counter = update_every - 1; + int maps_per_core = em->maps_per_core; while (!ebpf_exit_plugin) { (void)heartbeat_next(&hb, USEC_PER_SEC); @@ -222,7 +233,8 @@ static void mdflush_collector(ebpf_module_t *em) continue; counter = 0; - mdflush_read_count_map(); + mdflush_read_count_map(maps_per_core); + pthread_mutex_lock(&lock); // write dims now for all hitherto discovered devices. write_begin_chart("mdstat", "mdstat_flush"); avl_traverse_lock(&mdflush_pub, mdflush_write_dims, NULL); @@ -251,6 +263,9 @@ void *ebpf_mdflush_thread(void *ptr) goto endmdflush; } +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); +#endif em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); if (!em->probe_links) { goto endmdflush; diff --git a/collectors/ebpf.plugin/ebpf_mount.c b/collectors/ebpf.plugin/ebpf_mount.c index a2a4c55303e127..e0951f8c42abc7 100644 --- a/collectors/ebpf.plugin/ebpf_mount.c +++ b/collectors/ebpf.plugin/ebpf_mount.c @@ -5,10 +5,18 @@ static ebpf_local_maps_t mount_maps[] = {{.name = "tbl_mount", .internal_input = NETDATA_MOUNT_END, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, {.name = NULL, .internal_input = 0, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; static char *mount_dimension_name[NETDATA_EBPF_MOUNT_SYSCALL] = { "mount", "umount" }; static netdata_syscall_stat_t mount_aggregated_data[NETDATA_EBPF_MOUNT_SYSCALL]; @@ -192,6 +200,8 @@ static inline int ebpf_mount_load_and_attach(struct mount_bpf *obj, ebpf_module_ ebpf_mount_disable_trampoline(obj); } + ebpf_update_map_type(obj->maps.tbl_mount, &mount_maps[NETDATA_KEY_MOUNT_TABLE]); + int ret = mount_bpf__load(obj); if (!ret) { if (test != EBPF_LOAD_PROBE && test != EBPF_LOAD_RETPROBE ) @@ -249,8 +259,10 @@ static void ebpf_mount_exit(void *ptr) * Read global table * * Read the table with number of calls for all functions + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_mount_read_global_table() +static void ebpf_mount_read_global_table(int maps_per_core) { static netdata_idx_t *mount_values = NULL; if (!mount_values) @@ -259,17 +271,22 @@ static void ebpf_mount_read_global_table() uint32_t idx; netdata_idx_t *val = mount_hash_values; netdata_idx_t *stored = mount_values; + size_t length = sizeof(netdata_idx_t); + if (maps_per_core) + length *= ebpf_nprocs; + int fd = mount_maps[NETDATA_KEY_MOUNT_TABLE].map_fd; for (idx = NETDATA_KEY_MOUNT_CALL; idx < NETDATA_MOUNT_END; idx++) { if (!bpf_map_lookup_elem(fd, &idx, stored)) { int i; - int end = ebpf_nprocs; + int end = (maps_per_core) ? ebpf_nprocs : 1; netdata_idx_t total = 0; for (i = 0; i < end; i++) total += stored[i]; val[idx] = total; + memset(stored, 0, length); } } } @@ -304,13 +321,14 @@ static void mount_collector(ebpf_module_t *em) heartbeat_init(&hb); int update_every = em->update_every; int counter = update_every - 1; + int maps_per_core = em->maps_per_core; while (!ebpf_exit_plugin) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; counter = 0; - ebpf_mount_read_global_table(); + ebpf_mount_read_global_table(maps_per_core); pthread_mutex_lock(&lock); ebpf_mount_send_data(); @@ -372,6 +390,10 @@ static void ebpf_create_mount_charts(int update_every) */ static int ebpf_mount_load_bpf(ebpf_module_t *em) { +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); +#endif + int ret = 0; if (em->load & EBPF_LOAD_LEGACY) { em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); diff --git a/collectors/ebpf.plugin/ebpf_oomkill.c b/collectors/ebpf.plugin/ebpf_oomkill.c index 856c922ec2dadf..0948752920b260 100644 --- a/collectors/ebpf.plugin/ebpf_oomkill.c +++ b/collectors/ebpf.plugin/ebpf_oomkill.c @@ -16,7 +16,10 @@ static ebpf_local_maps_t oomkill_maps[] = { .internal_input = NETDATA_OOMKILL_MAX_ENTRIES, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif }, /* end */ { @@ -24,7 +27,10 @@ static ebpf_local_maps_t oomkill_maps[] = { .internal_input = 0, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif } }; @@ -285,6 +291,8 @@ static void ebpf_update_oomkill_cgroup(int32_t *keys, uint32_t total) /** * Main loop for this collector. + * + * @param em the thread main structure. */ static void oomkill_collector(ebpf_module_t *em) { @@ -384,6 +392,9 @@ void *ebpf_oomkill_thread(void *ptr) goto endoomkill; } +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); +#endif em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); if (!em->probe_links) { goto endoomkill; diff --git a/collectors/ebpf.plugin/ebpf_process.c b/collectors/ebpf.plugin/ebpf_process.c index 66af47857f6d40..2878dbe2dba7c6 100644 --- a/collectors/ebpf.plugin/ebpf_process.c +++ b/collectors/ebpf.plugin/ebpf_process.c @@ -18,17 +18,33 @@ static char *status[] = { "process", "zombie" }; static ebpf_local_maps_t process_maps[] = {{.name = "tbl_pid_stats", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, .user_input = 0, .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, {.name = "tbl_total_stats", .internal_input = NETDATA_KEY_END_VECTOR, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, {.name = "process_ctrl", .internal_input = NETDATA_CONTROLLER_END, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, {.name = NULL, .internal_input = 0, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; char *tracepoint_sched_type = { "sched" } ; char *tracepoint_sched_process_exit = { "sched_process_exit" }; @@ -39,6 +55,7 @@ static int was_sched_process_exec_enabled = 0; static int was_sched_process_fork_enabled = 0; static netdata_idx_t *process_hash_values = NULL; +ebpf_process_stat_t *process_stat_vector = NULL; static netdata_syscall_stat_t process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_END]; static netdata_publish_syscall_t process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_END]; @@ -55,6 +72,7 @@ static char *threads_stat[NETDATA_EBPF_THREAD_STAT_END] = {"total", "running"}; static char *load_event_stat[NETDATA_EBPF_LOAD_STAT_END] = {"legacy", "co-re"}; static char *memlock_stat = {"memory_locked"}; static char *hash_table_stat = {"hash_table"}; +static char *hash_table_core[NETDATA_EBPF_LOAD_STAT_END] = {"per_core", "unique"}; /***************************************************************** * @@ -251,8 +269,10 @@ void ebpf_process_send_apps_data(struct ebpf_target *root, ebpf_module_t *em) /** * Read the hash table and store data to allocated vectors. + * + * @param maps_per_core do I need to read all cores? */ -static void read_hash_global_tables() +static void ebpf_read_process_hash_global_tables(int maps_per_core) { uint64_t idx; netdata_idx_t res[NETDATA_KEY_END_VECTOR]; @@ -263,7 +283,7 @@ static void read_hash_global_tables() if (!bpf_map_lookup_elem(fd, &idx, val)) { uint64_t total = 0; int i; - int end = ebpf_nprocs; + int end = (maps_per_core) ? ebpf_nprocs : 1; for (i = 0; i < end; i++) total += val[i]; @@ -285,13 +305,18 @@ static void read_hash_global_tables() /** * Update cgroup * - * Update cgroup data based in + * Update cgroup data based in PID running. + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_update_process_cgroup() +static void ebpf_update_process_cgroup(int maps_per_core) { ebpf_cgroup_target_t *ect ; int pid_fd = process_maps[NETDATA_PROCESS_PID_TABLE].map_fd; + size_t length = sizeof(ebpf_process_stat_t); + if (maps_per_core) + length *= ebpf_nprocs; pthread_mutex_lock(&mutex_cgroup_shm); for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { struct pid_on_target2 *pids; @@ -303,9 +328,15 @@ static void ebpf_update_process_cgroup() memcpy(out, in, sizeof(ebpf_process_stat_t)); } else { - if (bpf_map_lookup_elem(pid_fd, &pid, out)) { + if (bpf_map_lookup_elem(pid_fd, &pid, process_stat_vector)) { memset(out, 0, sizeof(ebpf_process_stat_t)); } + + ebpf_process_apps_accumulator(process_stat_vector, maps_per_core); + + memcpy(out, process_stat_vector, sizeof(ebpf_process_stat_t)); + + memset(process_stat_vector, 0, length); } } } @@ -506,6 +537,35 @@ static inline void ebpf_create_statistic_hash_tables(ebpf_module_t *em) ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); } +/** + * Create chart for percpu stats + * + * Write to standard output current values for threads. + * + * @param em a pointer to the structure with the default values. + */ +static inline void ebpf_create_statistic_hash_per_core(ebpf_module_t *em) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_HASH_TABLES_PER_CORE, + "How threads are loading hash/array tables.", + "threads", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 140004, + em->update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_write_global_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], + hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_global_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], + hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + /** * Update Internal Metric variable * @@ -541,6 +601,8 @@ static void ebpf_create_statistic_charts(ebpf_module_t *em) ebpf_create_statistic_kernel_memory(em); ebpf_create_statistic_hash_tables(em); + + ebpf_create_statistic_hash_per_core(em); } /** @@ -647,6 +709,7 @@ static void ebpf_process_exit(void *ptr) ebpf_module_t *em = (ebpf_module_t *)ptr; freez(process_hash_values); + freez(process_stat_vector); ebpf_process_disable_tracepoints(); @@ -1010,6 +1073,11 @@ void ebpf_send_statistic_data() write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_LOADED); write_chart_dimension(hash_table_stat, (long long)plugin_statistics.hash_tables); write_end_chart(); + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_PER_CORE); + write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], (long long)plugin_statistics.hash_percpu); + write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], (long long)plugin_statistics.hash_unique); + write_end_chart(); } /** @@ -1032,6 +1100,7 @@ static void process_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; + int maps_per_core = em->maps_per_core; while (!ebpf_exit_plugin) { usec_t dt = heartbeat_next(&hb, USEC_PER_SEC); (void)dt; @@ -1041,14 +1110,14 @@ static void process_collector(ebpf_module_t *em) if (++counter == update_every) { counter = 0; - read_hash_global_tables(); + ebpf_read_process_hash_global_tables(maps_per_core); netdata_apps_integration_flags_t apps_enabled = em->apps_charts; pthread_mutex_lock(&collect_data_mutex); if (ebpf_all_pids_count > 0) { if (cgroups && shm_ebpf_cgroup.header) { - ebpf_update_process_cgroup(); + ebpf_update_process_cgroup(maps_per_core); } } @@ -1099,6 +1168,7 @@ static void ebpf_process_allocate_global_vectors(size_t length) memset(process_aggregated_data, 0, length * sizeof(netdata_syscall_stat_t)); memset(process_publish_aggregated, 0, length * sizeof(netdata_publish_syscall_t)); process_hash_values = callocz(ebpf_nprocs, sizeof(netdata_idx_t)); + process_stat_vector = callocz(ebpf_nprocs, sizeof(ebpf_process_stat_t)); global_process_stats = callocz((size_t)pid_max, sizeof(ebpf_process_stat_t *)); } diff --git a/collectors/ebpf.plugin/ebpf_process.h b/collectors/ebpf.plugin/ebpf_process.h index 5f119aea1f2b65..bccdc0eb5eee9d 100644 --- a/collectors/ebpf.plugin/ebpf_process.h +++ b/collectors/ebpf.plugin/ebpf_process.h @@ -56,6 +56,13 @@ enum netdata_ebpf_load_mode_stats{ NETDATA_EBPF_LOAD_STAT_END }; +enum netdata_ebpf_thread_per_core{ + NETDATA_EBPF_THREAD_PER_CORE, + NETDATA_EBPF_THREAD_UNIQUE, + + NETDATA_EBPF_PER_CORE_END +}; + // Index from kernel typedef enum ebpf_process_index { NETDATA_KEY_CALLS_DO_EXIT, diff --git a/collectors/ebpf.plugin/ebpf_shm.c b/collectors/ebpf.plugin/ebpf_shm.c index f81c01964f015c..093d65b6025891 100644 --- a/collectors/ebpf.plugin/ebpf_shm.c +++ b/collectors/ebpf.plugin/ebpf_shm.c @@ -21,15 +21,27 @@ struct config shm_config = { .first_section = NULL, static ebpf_local_maps_t shm_maps[] = {{.name = "tbl_pid_shm", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, .user_input = 0, .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, {.name = "shm_ctrl", .internal_input = NETDATA_CONTROLLER_END, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, {.name = "tbl_shm", .internal_input = NETDATA_SHM_END, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, {.name = NULL, .internal_input = 0, .user_input = 0}}; netdata_ebpf_targets_t shm_targets[] = { {.name = "shmget", .mode = EBPF_LOAD_TRAMPOLINE}, @@ -215,10 +227,14 @@ static void ebpf_shm_disable_release_task(struct shm_bpf *obj) * @param obj is the main structure for bpf objects. * @param em structure with configuration */ -static void ebpf_shm_adjust_map_size(struct shm_bpf *obj, ebpf_module_t *em) +static void ebpf_shm_adjust_map(struct shm_bpf *obj, ebpf_module_t *em) { ebpf_update_map_size(obj->maps.tbl_pid_shm, &shm_maps[NETDATA_PID_SHM_TABLE], em, bpf_map__name(obj->maps.tbl_pid_shm)); + + ebpf_update_map_type(obj->maps.tbl_shm, &shm_maps[NETDATA_SHM_GLOBAL_TABLE]); + ebpf_update_map_type(obj->maps.tbl_pid_shm, &shm_maps[NETDATA_PID_SHM_TABLE]); + ebpf_update_map_type(obj->maps.shm_ctrl, &shm_maps[NETDATA_SHM_CONTROLLER]); } /** @@ -250,7 +266,7 @@ static inline int ebpf_shm_load_and_attach(struct shm_bpf *obj, ebpf_module_t *e ebpf_disable_trampoline(obj); } - ebpf_shm_adjust_map_size(obj, em); + ebpf_shm_adjust_map(obj, em); if (!em->apps_charts && !em->cgroup_charts) ebpf_shm_disable_release_task(obj); @@ -312,10 +328,11 @@ static void ebpf_shm_exit(void *ptr) * Sum all values read from kernel and store in the first address. * * @param out the vector with read values. + * @param maps_per_core do I need to read all cores? */ -static void shm_apps_accumulator(netdata_publish_shm_t *out) +static void shm_apps_accumulator(netdata_publish_shm_t *out, int maps_per_core) { - int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + int i, end = (maps_per_core) ? ebpf_nprocs : 1; netdata_publish_shm_t *total = &out[0]; for (i = 1; i < end; i++) { netdata_publish_shm_t *w = &out[i]; @@ -349,12 +366,17 @@ static void shm_fill_pid(uint32_t current_pid, netdata_publish_shm_t *publish) * Update cgroup * * Update cgroup data based in + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_update_shm_cgroup() +static void ebpf_update_shm_cgroup(int maps_per_core) { netdata_publish_shm_t *cv = shm_vector; int fd = shm_maps[NETDATA_PID_SHM_TABLE].map_fd; - size_t length = sizeof(netdata_publish_shm_t) * ebpf_nprocs; + size_t length = sizeof(netdata_publish_shm_t); + if (maps_per_core) + length *= ebpf_nprocs; + ebpf_cgroup_target_t *ect; memset(cv, 0, length); @@ -371,7 +393,7 @@ static void ebpf_update_shm_cgroup() memcpy(out, in, sizeof(netdata_publish_shm_t)); } else { if (!bpf_map_lookup_elem(fd, &pid, cv)) { - shm_apps_accumulator(cv); + shm_apps_accumulator(cv, maps_per_core); memcpy(out, cv, sizeof(netdata_publish_shm_t)); @@ -389,14 +411,19 @@ static void ebpf_update_shm_cgroup() * Read APPS table * * Read the apps table and store data inside the structure. + * + * @param maps_per_core do I need to read all cores? */ -static void read_apps_table() +static void read_shm_apps_table(int maps_per_core) { netdata_publish_shm_t *cv = shm_vector; uint32_t key; struct ebpf_pid_stat *pids = ebpf_root_of_pids; int fd = shm_maps[NETDATA_PID_SHM_TABLE].map_fd; - size_t length = sizeof(netdata_publish_shm_t)*ebpf_nprocs; + size_t length = sizeof(netdata_publish_shm_t); + if (maps_per_core) + length *= ebpf_nprocs; + while (pids) { key = pids->pid; @@ -405,7 +432,7 @@ static void read_apps_table() continue; } - shm_apps_accumulator(cv); + shm_apps_accumulator(cv, maps_per_core); shm_fill_pid(key, cv); @@ -446,23 +473,29 @@ static void shm_send_global() * Read global counter * * Read the table with number of calls for all functions + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_shm_read_global_table() +static void ebpf_shm_read_global_table(int maps_per_core) { netdata_idx_t *stored = shm_values; netdata_idx_t *val = shm_hash_values; int fd = shm_maps[NETDATA_SHM_GLOBAL_TABLE].map_fd; + size_t length = sizeof(netdata_idx_t); + if (maps_per_core) + length *= ebpf_nprocs; uint32_t i, end = NETDATA_SHM_END; for (i = NETDATA_KEY_SHMGET_CALL; i < end; i++) { if (!bpf_map_lookup_elem(fd, &i, stored)) { int j; - int last = ebpf_nprocs; + int last = (maps_per_core) ? ebpf_nprocs : 1; netdata_idx_t total = 0; for (j = 0; j < last; j++) total += stored[j]; val[i] = total; + memset(stored, 0 , length); } } } @@ -831,6 +864,7 @@ static void shm_collector(ebpf_module_t *em) heartbeat_t hb; heartbeat_init(&hb); int counter = update_every - 1; + int maps_per_core = em->maps_per_core; while (!ebpf_exit_plugin) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -838,14 +872,14 @@ static void shm_collector(ebpf_module_t *em) counter = 0; netdata_apps_integration_flags_t apps = em->apps_charts; - ebpf_shm_read_global_table(); + ebpf_shm_read_global_table(maps_per_core); pthread_mutex_lock(&collect_data_mutex); if (apps) { - read_apps_table(); + read_shm_apps_table(maps_per_core); } if (cgroups) { - ebpf_update_shm_cgroup(); + ebpf_update_shm_cgroup(maps_per_core); } pthread_mutex_lock(&lock); @@ -984,6 +1018,10 @@ static void ebpf_create_shm_charts(int update_every) */ static int ebpf_shm_load_bpf(ebpf_module_t *em) { +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); +#endif + int ret = 0; ebpf_adjust_apps_cgroup(em, em->targets[NETDATA_KEY_SHMGET_CALL].mode); diff --git a/collectors/ebpf.plugin/ebpf_socket.c b/collectors/ebpf.plugin/ebpf_socket.c index aebc9ca122c548..b45dec7d9e3c7b 100644 --- a/collectors/ebpf.plugin/ebpf_socket.c +++ b/collectors/ebpf.plugin/ebpf_socket.c @@ -27,35 +27,67 @@ static ebpf_local_maps_t socket_maps[] = {{.name = "tbl_bandwidth", .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED, .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED, .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, {.name = "tbl_global_sock", .internal_input = NETDATA_SOCKET_COUNTER, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, {.name = "tbl_lports", .internal_input = NETDATA_SOCKET_COUNTER, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, {.name = "tbl_conn_ipv4", .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED, .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, {.name = "tbl_conn_ipv6", .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED, .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, {.name = "tbl_nv_udp", .internal_input = NETDATA_COMPILED_UDP_CONNECTIONS_ALLOWED, .user_input = NETDATA_MAXIMUM_UDP_CONNECTIONS_ALLOWED, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, {.name = "socket_ctrl", .internal_input = NETDATA_CONTROLLER_END, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = NULL, .internal_input = 0, .user_input = 0}}; + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; static netdata_idx_t *socket_hash_values = NULL; static netdata_syscall_stat_t socket_aggregated_data[NETDATA_MAX_SOCKET_VECTOR]; @@ -362,7 +394,7 @@ static void ebpf_socket_set_hash_tables(struct socket_bpf *obj) * @param obj is the main structure for bpf objects. * @param em structure with configuration */ -static void ebpf_socket_adjust_map_size(struct socket_bpf *obj, ebpf_module_t *em) +static void ebpf_socket_adjust_map(struct socket_bpf *obj, ebpf_module_t *em) { ebpf_update_map_size(obj->maps.tbl_bandwidth, &socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH], em, bpf_map__name(obj->maps.tbl_bandwidth)); @@ -375,6 +407,15 @@ static void ebpf_socket_adjust_map_size(struct socket_bpf *obj, ebpf_module_t *e ebpf_update_map_size(obj->maps.tbl_nv_udp, &socket_maps[NETDATA_SOCKET_TABLE_UDP], em, bpf_map__name(obj->maps.tbl_nv_udp)); + + + ebpf_update_map_type(obj->maps.tbl_bandwidth, &socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH]); + ebpf_update_map_type(obj->maps.tbl_conn_ipv4, &socket_maps[NETDATA_SOCKET_TABLE_IPV4]); + ebpf_update_map_type(obj->maps.tbl_conn_ipv6, &socket_maps[NETDATA_SOCKET_TABLE_IPV6]); + ebpf_update_map_type(obj->maps.tbl_nv_udp, &socket_maps[NETDATA_SOCKET_TABLE_UDP]); + ebpf_update_map_type(obj->maps.socket_ctrl, &socket_maps[NETDATA_SOCKET_TABLE_CTRL]); + ebpf_update_map_type(obj->maps.tbl_global_sock, &socket_maps[NETDATA_SOCKET_GLOBAL]); + ebpf_update_map_type(obj->maps.tbl_lports, &socket_maps[NETDATA_SOCKET_LPORTS]); } /** @@ -403,14 +444,14 @@ static inline int ebpf_socket_load_and_attach(struct socket_bpf *obj, ebpf_modul ebpf_socket_disable_specific_probe(obj, em->mode); } + ebpf_socket_adjust_map(obj, em); + int ret = socket_bpf__load(obj); if (ret) { fprintf(stderr, "failed to load BPF object: %d\n", ret); return ret; } - ebpf_socket_adjust_map_size(obj, em); - if (test == EBPF_LOAD_TRAMPOLINE) { ret = socket_bpf__attach(obj); } else { @@ -1988,17 +2029,23 @@ static void hash_accumulator(netdata_socket_t *values, netdata_socket_idx_t *key * * @param fd the hash table with data. * @param family the family associated to the hash table + * @param maps_per_core do I need to read all cores? * * @return it returns 0 on success and -1 otherwise. */ -static void ebpf_read_socket_hash_table(int fd, int family) +static void ebpf_read_socket_hash_table(int fd, int family, int maps_per_core) { netdata_socket_idx_t key = {}; netdata_socket_idx_t next_key = {}; netdata_socket_t *values = socket_values; - size_t length = ebpf_nprocs*sizeof(netdata_socket_t); - int test, end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + size_t length = sizeof(netdata_socket_t); + int test, end; + if (maps_per_core) { + length *= ebpf_nprocs; + end = ebpf_nprocs; + } else + end = 1; while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { // We need to reset the values when we are working on kernel 4.15 or newer, because kernel does not create @@ -2122,11 +2169,13 @@ static void read_listen_table() void *ebpf_socket_read_hash(void *ptr) { netdata_thread_cleanup_push(ebpf_socket_cleanup, ptr); + ebpf_module_t *em = (ebpf_module_t *)ptr; heartbeat_t hb; heartbeat_init(&hb); int fd_ipv4 = socket_maps[NETDATA_SOCKET_TABLE_IPV4].map_fd; int fd_ipv6 = socket_maps[NETDATA_SOCKET_TABLE_IPV6].map_fd; + int maps_per_core = em->maps_per_core; // This thread is cancelled from another thread for (;;) { (void)heartbeat_next(&hb, USEC_PER_SEC); @@ -2134,8 +2183,8 @@ void *ebpf_socket_read_hash(void *ptr) break; pthread_mutex_lock(&nv_mutex); - ebpf_read_socket_hash_table(fd_ipv4, AF_INET); - ebpf_read_socket_hash_table(fd_ipv6, AF_INET6); + ebpf_read_socket_hash_table(fd_ipv4, AF_INET, maps_per_core); + ebpf_read_socket_hash_table(fd_ipv6, AF_INET6, maps_per_core); pthread_mutex_unlock(&nv_mutex); } @@ -2145,23 +2194,30 @@ void *ebpf_socket_read_hash(void *ptr) /** * Read the hash table and store data to allocated vectors. + * + * @param maps_per_core do I need to read all cores? */ -static void read_hash_global_tables() +static void read_hash_global_tables(int maps_per_core) { uint64_t idx; netdata_idx_t res[NETDATA_SOCKET_COUNTER]; netdata_idx_t *val = socket_hash_values; + size_t length = sizeof(netdata_idx_t); + if (maps_per_core) + length *= ebpf_nprocs; + int fd = socket_maps[NETDATA_SOCKET_GLOBAL].map_fd; for (idx = 0; idx < NETDATA_SOCKET_COUNTER; idx++) { if (!bpf_map_lookup_elem(fd, &idx, val)) { uint64_t total = 0; int i; - int end = ebpf_nprocs; + int end = (maps_per_core) ? ebpf_nprocs : 1; for (i = 0; i < end; i++) total += val[i]; res[idx] = total; + memset(socket_hash_values, 0, length); } else { res[idx] = 0; } @@ -2220,9 +2276,9 @@ void ebpf_socket_fill_publish_apps(uint32_t current_pid, ebpf_bandwidth_t *eb) * * @param out the vector with the values to sum */ -void ebpf_socket_bandwidth_accumulator(ebpf_bandwidth_t *out) +void ebpf_socket_bandwidth_accumulator(ebpf_bandwidth_t *out, int maps_per_core) { - int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + int i, end = (maps_per_core) ? ebpf_nprocs : 1; ebpf_bandwidth_t *total = &out[0]; for (i = 1; i < end; i++) { ebpf_bandwidth_t *move = &out[i]; @@ -2241,13 +2297,18 @@ void ebpf_socket_bandwidth_accumulator(ebpf_bandwidth_t *out) /** * Update the apps data reading information from the hash table + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_socket_update_apps_data() +static void ebpf_socket_update_apps_data(int maps_per_core) { int fd = socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].map_fd; ebpf_bandwidth_t *eb = bandwidth_vector; uint32_t key; struct ebpf_pid_stat *pids = ebpf_root_of_pids; + size_t length = sizeof(ebpf_bandwidth_t); + if (maps_per_core) + length *= ebpf_nprocs; while (pids) { key = pids->pid; @@ -2256,10 +2317,12 @@ static void ebpf_socket_update_apps_data() continue; } - ebpf_socket_bandwidth_accumulator(eb); + ebpf_socket_bandwidth_accumulator(eb, maps_per_core); ebpf_socket_fill_publish_apps(key, eb); + memset(eb, 0, length); + pids = pids->next; } } @@ -2267,15 +2330,21 @@ static void ebpf_socket_update_apps_data() /** * Update cgroup * - * Update cgroup data based in + * Update cgroup data based in PIDs. + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_update_socket_cgroup() +static void ebpf_update_socket_cgroup(int maps_per_core) { ebpf_cgroup_target_t *ect ; ebpf_bandwidth_t *eb = bandwidth_vector; int fd = socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].map_fd; + size_t length = sizeof(ebpf_bandwidth_t); + if (maps_per_core) + length *= ebpf_nprocs; + pthread_mutex_lock(&mutex_cgroup_shm); for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { struct pid_on_target2 *pids; @@ -2298,7 +2367,7 @@ static void ebpf_update_socket_cgroup() publish->call_tcp_v6_connection = in->call_tcp_v6_connection; } else { if (!bpf_map_lookup_elem(fd, &pid, eb)) { - ebpf_socket_bandwidth_accumulator(eb); + ebpf_socket_bandwidth_accumulator(eb, maps_per_core); memcpy(out, eb, sizeof(ebpf_bandwidth_t)); @@ -2312,6 +2381,8 @@ static void ebpf_update_socket_cgroup() publish->call_close = out->close; publish->call_tcp_v4_connection = out->tcp_v4_connection; publish->call_tcp_v6_connection = out->tcp_v6_connection; + + memset(eb, 0, length); } } } @@ -2845,6 +2916,7 @@ static void socket_collector(ebpf_module_t *em) int socket_global_enabled = em->global_charts; int update_every = em->update_every; + int maps_per_core = em->maps_per_core; int counter = update_every - 1; while (!ebpf_exit_plugin) { (void)heartbeat_next(&hb, USEC_PER_SEC); @@ -2855,15 +2927,15 @@ static void socket_collector(ebpf_module_t *em) netdata_apps_integration_flags_t socket_apps_enabled = em->apps_charts; if (socket_global_enabled) { read_listen_table(); - read_hash_global_tables(); + read_hash_global_tables(maps_per_core); } pthread_mutex_lock(&collect_data_mutex); if (socket_apps_enabled) - ebpf_socket_update_apps_data(); + ebpf_socket_update_apps_data(maps_per_core); if (cgroups) - ebpf_update_socket_cgroup(); + ebpf_update_socket_cgroup(maps_per_core); if (network_connection) calculate_nv_plot(); @@ -3855,6 +3927,10 @@ void parse_table_size_options(struct config *cfg) */ static int ebpf_socket_load_bpf(ebpf_module_t *em) { +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); +#endif + int ret = 0; if (em->load & EBPF_LOAD_LEGACY) { diff --git a/collectors/ebpf.plugin/ebpf_softirq.c b/collectors/ebpf.plugin/ebpf_softirq.c index 33abbdf5ea1f0c..01e2d0a5284af7 100644 --- a/collectors/ebpf.plugin/ebpf_softirq.c +++ b/collectors/ebpf.plugin/ebpf_softirq.c @@ -16,7 +16,10 @@ static ebpf_local_maps_t softirq_maps[] = { .internal_input = NETDATA_SOFTIRQ_MAX_IRQS, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif }, /* end */ { @@ -24,7 +27,10 @@ static ebpf_local_maps_t softirq_maps[] = { .internal_input = 0, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif } }; @@ -94,10 +100,21 @@ static void softirq_cleanup(void *ptr) * MAIN LOOP *****************************************************************/ -static void softirq_read_latency_map() +/** + * Read Latency Map + * + * Read data from kernel ring to plot for users. + * + * @param maps_per_core do I need to read all cores? + */ +static void softirq_read_latency_map(int maps_per_core) { int fd = softirq_maps[SOFTIRQ_MAP_LATENCY].map_fd; int i; + size_t length = sizeof(softirq_ebpf_val_t); + if (maps_per_core) + length *= ebpf_nprocs; + for (i = 0; i < NETDATA_SOFTIRQ_MAX_IRQS; i++) { int test = bpf_map_lookup_elem(fd, &i, softirq_ebpf_vals); if (unlikely(test < 0)) { @@ -106,12 +123,13 @@ static void softirq_read_latency_map() uint64_t total_latency = 0; int cpu_i; - int end = ebpf_nprocs; + int end = (maps_per_core) ? ebpf_nprocs : 1; for (cpu_i = 0; cpu_i < end; cpu_i++) { total_latency += softirq_ebpf_vals[cpu_i].latency/1000; } softirq_vals[i].latency = total_latency; + memset(softirq_ebpf_vals, 0, length); } } @@ -172,6 +190,7 @@ static void softirq_collector(ebpf_module_t *em) heartbeat_init(&hb); int update_every = em->update_every; int counter = update_every - 1; + int maps_per_core = em->maps_per_core; //This will be cancelled by its parent while (!ebpf_exit_plugin) { (void)heartbeat_next(&hb, USEC_PER_SEC); @@ -179,7 +198,7 @@ static void softirq_collector(ebpf_module_t *em) continue; counter = 0; - softirq_read_latency_map(); + softirq_read_latency_map(maps_per_core); pthread_mutex_lock(&lock); // write dims now for all hitherto discovered IRQs. @@ -212,6 +231,9 @@ void *ebpf_softirq_thread(void *ptr) goto endsoftirq; } +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); +#endif em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); if (!em->probe_links) { goto endsoftirq; diff --git a/collectors/ebpf.plugin/ebpf_swap.c b/collectors/ebpf.plugin/ebpf_swap.c index 5dcf93f3d39353..c9129a3fae9962 100644 --- a/collectors/ebpf.plugin/ebpf_swap.c +++ b/collectors/ebpf.plugin/ebpf_swap.c @@ -21,16 +21,32 @@ struct config swap_config = { .first_section = NULL, static ebpf_local_maps_t swap_maps[] = {{.name = "tbl_pid_swap", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, .user_input = 0, .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, {.name = "swap_ctrl", .internal_input = NETDATA_CONTROLLER_END, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, {.name = "tbl_swap", .internal_input = NETDATA_SWAP_END, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = NULL, .internal_input = 0, .user_input = 0}}; + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; netdata_ebpf_targets_t swap_targets[] = { {.name = "swap_readpage", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = "swap_writepage", .mode = EBPF_LOAD_TRAMPOLINE}, @@ -133,17 +149,21 @@ static void ebpf_swap_set_hash_tables(struct swap_bpf *obj) } /** - * Adjust Map Size + * Adjust Map * * Resize maps according input from users. * * @param obj is the main structure for bpf objects. * @param em structure with configuration */ -static void ebpf_swap_adjust_map_size(struct swap_bpf *obj, ebpf_module_t *em) +static void ebpf_swap_adjust_map(struct swap_bpf *obj, ebpf_module_t *em) { ebpf_update_map_size(obj->maps.tbl_pid_swap, &swap_maps[NETDATA_PID_SWAP_TABLE], em, bpf_map__name(obj->maps.tbl_pid_swap)); + + ebpf_update_map_type(obj->maps.tbl_pid_swap, &swap_maps[NETDATA_PID_SWAP_TABLE]); + ebpf_update_map_type(obj->maps.tbl_swap, &swap_maps[NETDATA_SWAP_GLOBAL_TABLE]); + ebpf_update_map_type(obj->maps.swap_ctrl, &swap_maps[NETDATA_SWAP_CONTROLLER]); } /** @@ -182,7 +202,7 @@ static inline int ebpf_swap_load_and_attach(struct swap_bpf *obj, ebpf_module_t ebpf_swap_disable_trampoline(obj); } - ebpf_swap_adjust_map_size(obj, em); + ebpf_swap_adjust_map(obj, em); if (!em->apps_charts && !em->cgroup_charts) ebpf_swap_disable_release_task(obj); @@ -251,10 +271,11 @@ static void ebpf_swap_exit(void *ptr) * Sum all values read from kernel and store in the first address. * * @param out the vector with read values. + * @param maps_per_core do I need to read all cores? */ -static void swap_apps_accumulator(netdata_publish_swap_t *out) +static void swap_apps_accumulator(netdata_publish_swap_t *out, int maps_per_core) { - int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + int i, end = (maps_per_core) ? ebpf_nprocs : 1; netdata_publish_swap_t *total = &out[0]; for (i = 1; i < end; i++) { netdata_publish_swap_t *w = &out[i]; @@ -286,13 +307,17 @@ static void swap_fill_pid(uint32_t current_pid, netdata_publish_swap_t *publish) * Update cgroup * * Update cgroup data based in + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_update_swap_cgroup() +static void ebpf_update_swap_cgroup(int maps_per_core) { ebpf_cgroup_target_t *ect ; netdata_publish_swap_t *cv = swap_vector; int fd = swap_maps[NETDATA_PID_SWAP_TABLE].map_fd; - size_t length = sizeof(netdata_publish_swap_t)*ebpf_nprocs; + size_t length = sizeof(netdata_publish_swap_t); + if (maps_per_core) + length *= ebpf_nprocs; pthread_mutex_lock(&mutex_cgroup_shm); for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { struct pid_on_target2 *pids; @@ -306,9 +331,12 @@ static void ebpf_update_swap_cgroup() } else { memset(cv, 0, length); if (!bpf_map_lookup_elem(fd, &pid, cv)) { - swap_apps_accumulator(cv); + swap_apps_accumulator(cv, maps_per_core); memcpy(out, cv, sizeof(netdata_publish_swap_t)); + + // We are cleaning to avoid passing data read from one process to other. + memset(cv, 0, length); } } } @@ -320,14 +348,18 @@ static void ebpf_update_swap_cgroup() * Read APPS table * * Read the apps table and store data inside the structure. + * + * @param maps_per_core do I need to read all cores? */ -static void read_apps_table() +static void read_swap_apps_table(int maps_per_core) { netdata_publish_swap_t *cv = swap_vector; uint32_t key; struct ebpf_pid_stat *pids = ebpf_root_of_pids; int fd = swap_maps[NETDATA_PID_SWAP_TABLE].map_fd; - size_t length = sizeof(netdata_publish_swap_t)*ebpf_nprocs; + size_t length = sizeof(netdata_publish_swap_t); + if (maps_per_core) + length *= ebpf_nprocs; while (pids) { key = pids->pid; @@ -336,7 +368,7 @@ static void read_apps_table() continue; } - swap_apps_accumulator(cv); + swap_apps_accumulator(cv, maps_per_core); swap_fill_pid(key, cv); @@ -365,8 +397,10 @@ static void swap_send_global() * Read global counter * * Read the table with number of calls to all functions + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_swap_read_global_table() +static void ebpf_swap_read_global_table(int maps_per_core) { netdata_idx_t *stored = swap_values; netdata_idx_t *val = swap_hash_values; @@ -376,7 +410,7 @@ static void ebpf_swap_read_global_table() for (i = NETDATA_KEY_SWAP_READPAGE_CALL; i < end; i++) { if (!bpf_map_lookup_elem(fd, &i, stored)) { int j; - int last = ebpf_nprocs; + int last = (maps_per_core) ? ebpf_nprocs : 1; netdata_idx_t total = 0; for (j = 0; j < last; j++) total += stored[j]; @@ -646,6 +680,7 @@ static void swap_collector(ebpf_module_t *em) heartbeat_t hb; heartbeat_init(&hb); int counter = update_every - 1; + int maps_per_core = em->maps_per_core; while (!ebpf_exit_plugin) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -653,13 +688,13 @@ static void swap_collector(ebpf_module_t *em) counter = 0; netdata_apps_integration_flags_t apps = em->apps_charts; - ebpf_swap_read_global_table(); + ebpf_swap_read_global_table(maps_per_core); pthread_mutex_lock(&collect_data_mutex); if (apps) - read_apps_table(); + read_swap_apps_table(maps_per_core); if (cgroup) - ebpf_update_swap_cgroup(); + ebpf_update_swap_cgroup(maps_per_core); pthread_mutex_lock(&lock); @@ -767,6 +802,10 @@ static void ebpf_create_swap_charts(int update_every) */ static int ebpf_swap_load_bpf(ebpf_module_t *em) { +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); +#endif + int ret = 0; ebpf_adjust_apps_cgroup(em, em->targets[NETDATA_KEY_SWAP_READPAGE_CALL].mode); if (em->load & EBPF_LOAD_LEGACY) { diff --git a/collectors/ebpf.plugin/ebpf_sync.c b/collectors/ebpf.plugin/ebpf_sync.c index f838b65af07245..66e9c742c54375 100644 --- a/collectors/ebpf.plugin/ebpf_sync.c +++ b/collectors/ebpf.plugin/ebpf_sync.c @@ -10,27 +10,95 @@ static netdata_publish_syscall_t sync_counter_publish_aggregated[NETDATA_SYNC_ID static netdata_idx_t sync_hash_values[NETDATA_SYNC_IDX_END]; -static ebpf_local_maps_t sync_maps[] = {{.name = "tbl_sync", .internal_input = NETDATA_SYNC_END, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "tbl_syncfs", .internal_input = NETDATA_SYNC_END, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "tbl_msync", .internal_input = NETDATA_SYNC_END, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "tbl_fsync", .internal_input = NETDATA_SYNC_END, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "tbl_fdatasync", .internal_input = NETDATA_SYNC_END, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = "tbl_syncfr", .internal_input = NETDATA_SYNC_END, - .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = NULL, .internal_input = 0, .user_input = 0, - .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; +ebpf_local_maps_t sync_maps[] = {{.name = "tbl_sync", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; + +ebpf_local_maps_t syncfs_maps[] = {{.name = "tbl_syncfs", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; + +ebpf_local_maps_t msync_maps[] = {{.name = "tbl_msync", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; + +ebpf_local_maps_t fsync_maps[] = {{.name = "tbl_fsync", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; + +ebpf_local_maps_t fdatasync_maps[] = {{.name = "tbl_fdatasync", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; + +ebpf_local_maps_t sync_file_range_maps[] = {{.name = "tbl_syncfr", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; struct config sync_config = { .first_section = NULL, .last_section = NULL, @@ -111,12 +179,12 @@ void ebpf_sync_disable_tracepoints(struct sync_bpf *obj, sync_syscalls_index_t i * * Set the values for maps according the value given by kernel. * - * @param obj is the main structure for bpf objects. - * @param idx the index for the main structure + * @param map the map loaded. + * @param obj the main structure for bpf objects. */ -static void ebpf_sync_set_hash_tables(struct sync_bpf *obj, sync_syscalls_index_t idx) +static void ebpf_sync_set_hash_tables(ebpf_local_maps_t *map, struct sync_bpf *obj) { - sync_maps[idx].map_fd = bpf_map__fd(obj->maps.tbl_sync); + map->map_fd = bpf_map__fd(obj->maps.tbl_sync); } /** @@ -154,6 +222,8 @@ static inline int ebpf_sync_load_and_attach(struct sync_bpf *obj, ebpf_module_t ebpf_sync_disable_tracepoints(obj, idx); } + ebpf_update_map_type(obj->maps.tbl_sync, &em->maps[NETDATA_SYNC_GLOBAL_TABLE]); + int ret = sync_bpf__load(obj); if (!ret) { if (test != EBPF_LOAD_PROBE && test != EBPF_LOAD_RETPROBE) { @@ -165,7 +235,7 @@ static inline int ebpf_sync_load_and_attach(struct sync_bpf *obj, ebpf_module_t } if (!ret) - ebpf_sync_set_hash_tables(obj, idx); + ebpf_sync_set_hash_tables(&em->maps[NETDATA_SYNC_GLOBAL_TABLE], obj); } return ret; @@ -264,11 +334,21 @@ static int ebpf_sync_load_legacy(ebpf_sync_syscalls_t *w, ebpf_module_t *em) */ static int ebpf_sync_initialize_syscall(ebpf_module_t *em) { +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(sync_maps, em->maps_per_core, running_on_kernel); + ebpf_define_map_type(syncfs_maps, em->maps_per_core, running_on_kernel); + ebpf_define_map_type(msync_maps, em->maps_per_core, running_on_kernel); + ebpf_define_map_type(fsync_maps, em->maps_per_core, running_on_kernel); + ebpf_define_map_type(fdatasync_maps, em->maps_per_core, running_on_kernel); + ebpf_define_map_type(sync_file_range_maps, em->maps_per_core, running_on_kernel); +#endif + int i; const char *saved_name = em->thread_name; int errors = 0; for (i = 0; local_syscalls[i].syscall; i++) { ebpf_sync_syscalls_t *w = &local_syscalls[i]; + w->sync_maps = local_syscalls[i].sync_maps; if (w->enabled) { if (em->load & EBPF_LOAD_LEGACY) { if (ebpf_sync_load_legacy(w, em)) @@ -317,17 +397,25 @@ static int ebpf_sync_initialize_syscall(ebpf_module_t *em) * Read global table * * Read the table with number of calls for all functions + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_sync_read_global_table() +static void ebpf_sync_read_global_table(int maps_per_core) { - netdata_idx_t stored; + netdata_idx_t stored[ebpf_nprocs]; uint32_t idx = NETDATA_SYNC_CALL; int i; for (i = 0; local_syscalls[i].syscall; i++) { - if (local_syscalls[i].enabled) { - int fd = sync_maps[i].map_fd; + ebpf_sync_syscalls_t *w = &local_syscalls[i]; + if (w->enabled) { + int fd = w->sync_maps[NETDATA_SYNC_GLOBAL_TABLE].map_fd; if (!bpf_map_lookup_elem(fd, &idx, &stored)) { - sync_hash_values[i] = stored; + int j, end = (maps_per_core) ? ebpf_nprocs : 1; + netdata_idx_t total = 0; + for (j = 0; j < end ;j++ ) + total += stored[j]; + + sync_hash_values[i] = total; } } } @@ -352,7 +440,7 @@ static void ebpf_send_sync_chart(char *id, while (move && idx <= end) { if (local_syscalls[idx].enabled) - write_chart_dimension(move->name, sync_hash_values[idx]); + write_chart_dimension(move->name, (long long)sync_hash_values[idx]); move = move->next; idx++; @@ -396,13 +484,14 @@ static void sync_collector(ebpf_module_t *em) heartbeat_init(&hb); int update_every = em->update_every; int counter = update_every - 1; + int maps_per_core = em->maps_per_core; while (!ebpf_exit_plugin) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; counter = 0; - ebpf_sync_read_global_table(); + ebpf_sync_read_global_table(maps_per_core); pthread_mutex_lock(&lock); sync_send_data(); @@ -497,6 +586,22 @@ static void ebpf_sync_parse_syscalls() } } +/** + * Set sync maps + * + * When thread is initialized the variable sync_maps is set as null, + * this function fills the variable before to use. + */ +static void ebpf_set_sync_maps() +{ + local_syscalls[NETDATA_SYNC_SYNC_IDX].sync_maps = sync_maps; + local_syscalls[NETDATA_SYNC_SYNCFS_IDX].sync_maps = syncfs_maps; + local_syscalls[NETDATA_SYNC_MSYNC_IDX].sync_maps = msync_maps; + local_syscalls[NETDATA_SYNC_FSYNC_IDX].sync_maps = fsync_maps; + local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].sync_maps = fdatasync_maps; + local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].sync_maps = sync_file_range_maps; +} + /** * Sync thread * @@ -513,6 +618,7 @@ void *ebpf_sync_thread(void *ptr) ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = sync_maps; + ebpf_set_sync_maps(); ebpf_sync_parse_syscalls(); #ifdef LIBBPF_MAJOR_VERSION diff --git a/collectors/ebpf.plugin/ebpf_vfs.c b/collectors/ebpf.plugin/ebpf_vfs.c index e2d87fd524b08d..bfc7ee8f71c218 100644 --- a/collectors/ebpf.plugin/ebpf_vfs.c +++ b/collectors/ebpf.plugin/ebpf_vfs.c @@ -17,15 +17,31 @@ netdata_publish_vfs_t *vfs_vector = NULL; static ebpf_local_maps_t vfs_maps[] = {{.name = "tbl_vfs_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, .user_input = 0, .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, {.name = "tbl_vfs_stats", .internal_input = NETDATA_VFS_COUNTER, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, {.name = "vfs_ctrl", .internal_input = NETDATA_CONTROLLER_END, .user_input = 0, .type = NETDATA_EBPF_MAP_CONTROLLER, - .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, - {.name = NULL, .internal_input = 0, .user_input = 0}}; + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }, + {.name = NULL, .internal_input = 0, .user_input = 0, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY +#endif + }}; struct config vfs_config = { .first_section = NULL, .last_section = NULL, @@ -293,17 +309,21 @@ static int ebpf_vfs_attach_probe(struct vfs_bpf *obj) } /** - * Adjust Map Size + * Adjust Size * * Resize maps according input from users. * * @param obj is the main structure for bpf objects. * @param em structure with configuration */ -static void ebpf_vfs_adjust_map_size(struct vfs_bpf *obj, ebpf_module_t *em) +static void ebpf_vfs_adjust_map(struct vfs_bpf *obj, ebpf_module_t *em) { ebpf_update_map_size(obj->maps.tbl_vfs_pid, &vfs_maps[NETDATA_VFS_PID], em, bpf_map__name(obj->maps.tbl_vfs_pid)); + + ebpf_update_map_type(obj->maps.tbl_vfs_pid, &vfs_maps[NETDATA_VFS_PID]); + ebpf_update_map_type(obj->maps.tbl_vfs_stats, &vfs_maps[NETDATA_VFS_ALL]); + ebpf_update_map_type(obj->maps.vfs_ctrl, &vfs_maps[NETDATA_VFS_CTRL]); } /** @@ -356,7 +376,7 @@ static inline int ebpf_vfs_load_and_attach(struct vfs_bpf *obj, ebpf_module_t *e ebpf_vfs_disable_trampoline(obj); } - ebpf_vfs_adjust_map_size(obj, em); + ebpf_vfs_adjust_map(obj, em); if (!em->apps_charts && !em->cgroup_charts) ebpf_vfs_disable_release_task(obj); @@ -475,23 +495,30 @@ static void ebpf_vfs_send_data(ebpf_module_t *em) /** * Read the hash table and store data to allocated vectors. + * + * @param maps_per_core do I need to read all cores? */ -static void ebpf_vfs_read_global_table() +static void ebpf_vfs_read_global_table(int maps_per_core) { uint64_t idx; netdata_idx_t res[NETDATA_VFS_COUNTER]; netdata_idx_t *val = vfs_hash_values; + size_t length = sizeof(netdata_idx_t); + if (maps_per_core) + length *= ebpf_nprocs; + int fd = vfs_maps[NETDATA_VFS_ALL].map_fd; for (idx = 0; idx < NETDATA_VFS_COUNTER; idx++) { uint64_t total = 0; if (!bpf_map_lookup_elem(fd, &idx, val)) { int i; - int end = ebpf_nprocs; + int end = (maps_per_core) ? ebpf_nprocs : 1; for (i = 0; i < end; i++) total += val[i]; } res[idx] = total; + memset(val, 0, length); } vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK].ncall = res[NETDATA_KEY_CALLS_VFS_UNLINK]; @@ -723,9 +750,9 @@ void ebpf_vfs_send_apps_data(ebpf_module_t *em, struct ebpf_target *root) * * @param out the vector with read values. */ -static void vfs_apps_accumulator(netdata_publish_vfs_t *out) +static void vfs_apps_accumulator(netdata_publish_vfs_t *out, int maps_per_core) { - int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + int i, end = (maps_per_core) ? ebpf_nprocs : 1; netdata_publish_vfs_t *total = &out[0]; for (i = 1; i < end; i++) { netdata_publish_vfs_t *w = &out[i]; @@ -771,12 +798,15 @@ static void vfs_fill_pid(uint32_t current_pid, netdata_publish_vfs_t *publish) /** * Read the hash table and store data to allocated vectors. */ -static void ebpf_vfs_read_apps() +static void ebpf_vfs_read_apps(int maps_per_core) { struct ebpf_pid_stat *pids = ebpf_root_of_pids; netdata_publish_vfs_t *vv = vfs_vector; int fd = vfs_maps[NETDATA_VFS_PID].map_fd; - size_t length = sizeof(netdata_publish_vfs_t) * ebpf_nprocs; + size_t length = sizeof(netdata_publish_vfs_t); + if (maps_per_core) + length *= ebpf_nprocs; + while (pids) { uint32_t key = pids->pid; @@ -785,7 +815,7 @@ static void ebpf_vfs_read_apps() continue; } - vfs_apps_accumulator(vv); + vfs_apps_accumulator(vv, maps_per_core); vfs_fill_pid(key, vv); @@ -799,14 +829,18 @@ static void ebpf_vfs_read_apps() /** * Update cgroup * - * Update cgroup data based in + * Update cgroup data based in PID. + * + * @param maps_per_core do I need to read all cores? */ -static void read_update_vfs_cgroup() +static void read_update_vfs_cgroup(int maps_per_core) { ebpf_cgroup_target_t *ect ; netdata_publish_vfs_t *vv = vfs_vector; int fd = vfs_maps[NETDATA_VFS_PID].map_fd; - size_t length = sizeof(netdata_publish_vfs_t) * ebpf_nprocs; + size_t length = sizeof(netdata_publish_vfs_t); + if (maps_per_core) + length *= ebpf_nprocs; pthread_mutex_lock(&mutex_cgroup_shm); for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { @@ -821,7 +855,7 @@ static void read_update_vfs_cgroup() } else { memset(vv, 0, length); if (!bpf_map_lookup_elem(fd, &pid, vv)) { - vfs_apps_accumulator(vv); + vfs_apps_accumulator(vv, maps_per_core); memcpy(out, vv, sizeof(netdata_publish_vfs_t)); } @@ -1458,6 +1492,7 @@ static void vfs_collector(ebpf_module_t *em) heartbeat_init(&hb); int update_every = em->update_every; int counter = update_every - 1; + int maps_per_core = em->maps_per_core; while (!ebpf_exit_plugin) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -1465,21 +1500,21 @@ static void vfs_collector(ebpf_module_t *em) counter = 0; netdata_apps_integration_flags_t apps = em->apps_charts; - ebpf_vfs_read_global_table(); + ebpf_vfs_read_global_table(maps_per_core); pthread_mutex_lock(&collect_data_mutex); if (apps) - ebpf_vfs_read_apps(); + ebpf_vfs_read_apps(maps_per_core); + + if (cgroups) + read_update_vfs_cgroup(maps_per_core); + + pthread_mutex_lock(&lock); #ifdef NETDATA_DEV_MODE if (ebpf_aral_vfs_pid) ebpf_send_data_aral_chart(ebpf_aral_vfs_pid, em); #endif - if (cgroups) - read_update_vfs_cgroup(); - - pthread_mutex_lock(&lock); - ebpf_vfs_send_data(em); fflush(stdout); @@ -1843,6 +1878,10 @@ static void ebpf_vfs_allocate_global_vectors(int apps) */ static int ebpf_vfs_load_bpf(ebpf_module_t *em) { +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); +#endif + int ret = 0; ebpf_adjust_apps_cgroup(em, em->targets[NETDATA_EBPF_VFS_WRITE].mode); if (em->load & EBPF_LOAD_LEGACY) { diff --git a/libnetdata/ebpf/ebpf.c b/libnetdata/ebpf/ebpf.c index 61833dd73c9680..caaefc32076944 100644 --- a/libnetdata/ebpf/ebpf.c +++ b/libnetdata/ebpf/ebpf.c @@ -453,6 +453,11 @@ void ebpf_update_stats(ebpf_plugin_stats_t *report, ebpf_module_t *em) else if (em->load & EBPF_LOAD_CORE) report->core++; + if (em->maps_per_core) + report->hash_percpu++; + else + report->hash_unique++; + ebpf_stats_targets(report, em->targets); } @@ -596,15 +601,70 @@ void ebpf_update_map_size(struct bpf_map *map, ebpf_local_maps_t *lmap, ebpf_mod #endif } +#ifdef LIBBPF_MAJOR_VERSION +/** + * Update map type + * + * Update map type with information given. + * + * @param map the map we want to modify + * @param w a structure with user input + */ +void ebpf_update_map_type(struct bpf_map *map, ebpf_local_maps_t *w) +{ + if (bpf_map__set_type(map, w->map_type)) { + error("Cannot modify map type for %s", w->name); + } +} + +/** + * Define map type + * + * This PR defines the type used by hash tables according user input. + * + * @param maps the list of maps used with a hash table. + * @param maps_per_core define if map type according user specification. + * @param kver kernel version host is running. + */ +void ebpf_define_map_type(ebpf_local_maps_t *maps, int maps_per_core, int kver) +{ + if (!maps) + return; + + // Before kernel 4.06 there was not percpu hash tables + if (kver < NETDATA_EBPF_KERNEL_4_06) + maps_per_core = CONFIG_BOOLEAN_NO; + + int i = 0; + while (maps[i].name) { + ebpf_local_maps_t *map = &maps[i]; + // maps_per_core is a boolean value in configuration files. + if (maps_per_core) { + if (map->map_type == BPF_MAP_TYPE_HASH) + map->map_type = BPF_MAP_TYPE_PERCPU_HASH; + else if (map->map_type == BPF_MAP_TYPE_ARRAY) + map->map_type = BPF_MAP_TYPE_PERCPU_ARRAY; + } else { + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) + map->map_type = BPF_MAP_TYPE_HASH; + else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + map->map_type = BPF_MAP_TYPE_ARRAY; + } + + i++; + } +} +#endif + /** - * Update Legacy map sizes + * Update Legacy map * - * Update map size for eBPF legacy code. + * Update map for eBPF legacy code. * * @param program the structure with values read from binary. * @param em the structure with information about how the module/thread is working. */ -static void ebpf_update_legacy_map_sizes(struct bpf_object *program, ebpf_module_t *em) +static void ebpf_update_legacy_map(struct bpf_object *program, ebpf_module_t *em) { struct bpf_map *map; ebpf_local_maps_t *maps = em->maps; @@ -614,13 +674,19 @@ static void ebpf_update_legacy_map_sizes(struct bpf_object *program, ebpf_module bpf_map__for_each(map, program) { const char *map_name = bpf_map__name(map); - int i = 0; ; + int i = 0; while (maps[i].name) { ebpf_local_maps_t *w = &maps[i]; - if (w->type & NETDATA_EBPF_MAP_RESIZABLE) { - if (!strcmp(w->name, map_name)) { + + if (!strcmp(w->name, map_name)) { + // Modify size + if (w->type & NETDATA_EBPF_MAP_RESIZABLE) { ebpf_update_map_size(map, w, em, map_name); } + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_update_map_type(map, w); +#endif } i++; @@ -796,7 +862,7 @@ struct bpf_link **ebpf_load_program(char *plugins_dir, ebpf_module_t *em, int kv return NULL; } - ebpf_update_legacy_map_sizes(*obj, em); + ebpf_update_legacy_map(*obj, em); if (bpf_object__load(*obj)) { error("ERROR: loading BPF object file failed %s\n", lpath); @@ -1156,8 +1222,8 @@ void ebpf_update_module_using_config(ebpf_module_t *modules, netdata_ebpf_load_m { char default_value[EBPF_MAX_MODE_LENGTH + 1]; ebpf_select_mode_string(default_value, EBPF_MAX_MODE_LENGTH, modules->mode); - char *value = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_LOAD_MODE, default_value); - modules->mode = ebpf_select_mode(value); + char *load_mode = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_LOAD_MODE, default_value); + modules->mode = ebpf_select_mode(load_mode); modules->update_every = (int)appconfig_get_number(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_UPDATE_EVERY, modules->update_every); @@ -1171,19 +1237,38 @@ void ebpf_update_module_using_config(ebpf_module_t *modules, netdata_ebpf_load_m modules->pid_map_size = (uint32_t)appconfig_get_number(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_PID_SIZE, modules->pid_map_size); - value = ebpf_convert_load_mode_to_string(modules->load & NETDATA_EBPF_LOAD_METHODS); - value = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_TYPE_FORMAT, value); - netdata_ebpf_load_mode_t load = epbf_convert_string_to_load_mode(value); + char *value = ebpf_convert_load_mode_to_string(modules->load & NETDATA_EBPF_LOAD_METHODS); + char *type_format = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_TYPE_FORMAT, value); + netdata_ebpf_load_mode_t load = epbf_convert_string_to_load_mode(type_format); load = ebpf_select_load_mode(btf_file, load, kver, is_rh); modules->load = origin | load; - value = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_CORE_ATTACH, EBPF_CFG_ATTACH_TRAMPOLINE); - netdata_ebpf_program_loaded_t fill_lm = ebpf_convert_core_type(value, modules->mode); + char *core_attach = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_CORE_ATTACH, EBPF_CFG_ATTACH_TRAMPOLINE); + netdata_ebpf_program_loaded_t fill_lm = ebpf_convert_core_type(core_attach, modules->mode); ebpf_update_target_with_conf(modules, fill_lm); value = ebpf_convert_collect_pid_to_string(modules->apps_level); - value = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_COLLECT_PID, value); - modules->apps_level = ebpf_convert_string_to_apps_level(value); + char *collect_pid = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_COLLECT_PID, value); + modules->apps_level = ebpf_convert_string_to_apps_level(collect_pid); + + modules->maps_per_core = appconfig_get_boolean(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_MAPS_PER_CORE, + modules->maps_per_core); + if (kver < NETDATA_EBPF_KERNEL_4_06) + modules->maps_per_core = CONFIG_BOOLEAN_NO; + +#ifdef NETDATA_DEV_MODE + info("The thread %s was configured with: mode = %s; update every = %d; apps = %s; cgroup = %s; ebpf type format = %s; ebpf co-re tracing = %s; collect pid = %s; maps per core = %s", + modules->thread_name, + load_mode, + modules->update_every, + (modules->apps_charts)?"enabled":"disabled", + (modules->cgroup_charts)?"enabled":"disabled", + type_format, + core_attach, + collect_pid, + (modules->maps_per_core)?"enabled":"disabled" + ); +#endif } /** diff --git a/libnetdata/ebpf/ebpf.h b/libnetdata/ebpf/ebpf.h index bf5fdc33d40387..e82aaedd479f94 100644 --- a/libnetdata/ebpf/ebpf.h +++ b/libnetdata/ebpf/ebpf.h @@ -40,6 +40,8 @@ #define EBPF_CFG_PROGRAM_PATH "btf path" +#define EBPF_CFG_MAPS_PER_CORE "maps per core" + #define EBPF_CFG_UPDATE_EVERY "update every" #define EBPF_CFG_UPDATE_APPS_EVERY_DEFAULT 10 #define EBPF_CFG_PID_SIZE "pid table size" @@ -77,6 +79,7 @@ * */ enum netdata_ebpf_kernel_versions { + NETDATA_EBPF_KERNEL_4_06 = 263680, // 264960 = 4 * 65536 + 6 * 256 NETDATA_EBPF_KERNEL_4_11 = 264960, // 264960 = 4 * 65536 + 15 * 256 NETDATA_EBPF_KERNEL_4_14 = 265728, // 264960 = 4 * 65536 + 14 * 256 NETDATA_EBPF_KERNEL_4_15 = 265984, // 265984 = 4 * 65536 + 15 * 256 @@ -196,6 +199,9 @@ typedef struct ebpf_local_maps { uint32_t user_input; uint32_t type; int map_fd; +#ifdef LIBBPF_MAJOR_VERSION + enum bpf_map_type map_type; +#endif } ebpf_local_maps_t; typedef struct ebpf_specify_name { @@ -243,6 +249,9 @@ typedef struct ebpf_plugin_stats { uint64_t memlock_kern; // The same information reported by bpftool, but it is not accurated // https://lore.kernel.org/linux-mm/20230112155326.26902-5-laoar.shao@gmail.com/T/ uint32_t hash_tables; // Number of hash tables used on the system. + + uint32_t hash_percpu; // Number of threads running per cpu maps + uint32_t hash_unique; // Number of threads running an unique map for all cores. } ebpf_plugin_stats_t; typedef enum ebpf_stats_action { @@ -296,6 +305,7 @@ typedef struct ebpf_module { // charts char memory_usage[NETDATA_EBPF_CHART_MEM_LENGTH]; char memory_allocations[NETDATA_EBPF_CHART_MEM_LENGTH]; + int maps_per_core; } ebpf_module_t; int ebpf_get_kernel_version(); @@ -348,6 +358,7 @@ typedef struct ebpf_filesystem_partitions { ebpf_addresses_t addresses; uint64_t kernels; + ebpf_local_maps_t *fs_maps; } ebpf_filesystem_partitions_t; typedef struct ebpf_sync_syscalls { @@ -365,6 +376,7 @@ typedef struct ebpf_sync_syscalls { #else void *sync_obj; #endif + ebpf_local_maps_t *sync_maps; } ebpf_sync_syscalls_t; void ebpf_histogram_dimension_cleanup(char **ptr, size_t length); @@ -391,6 +403,8 @@ void ebpf_adjust_thread_load(ebpf_module_t *mod, struct btf *file); struct btf *ebpf_parse_btf_file(const char *filename); struct btf *ebpf_load_btf_file(char *path, char *filename); int ebpf_is_function_inside_btf(struct btf *file, char *function); +void ebpf_update_map_type(struct bpf_map *map, ebpf_local_maps_t *w); +void ebpf_define_map_type(ebpf_local_maps_t *maps, int maps_per_core, int kver); #endif void ebpf_update_kernel_memory_with_vector(ebpf_plugin_stats_t *report, ebpf_local_maps_t *maps); From 04742049a5b04ba5f4e6e7f13f8ab1ee1b34da8d Mon Sep 17 00:00:00 2001 From: thiagoftsm Date: Wed, 24 May 2023 18:29:06 +0000 Subject: [PATCH 05/23] Invert order in remote write (#15097) Co-authored-by: ilyam8 --- .../remote_write/remote_write_request.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/exporting/prometheus/remote_write/remote_write_request.cc b/exporting/prometheus/remote_write/remote_write_request.cc index ecfa11fa8aedd7..a628082d11c230 100644 --- a/exporting/prometheus/remote_write/remote_write_request.cc +++ b/exporting/prometheus/remote_write/remote_write_request.cc @@ -45,16 +45,16 @@ void add_host_info( label->set_name("__name__"); label->set_value(name); - label = timeseries->add_labels(); - label->set_name("instance"); - label->set_value(instance); - if (application) { label = timeseries->add_labels(); label->set_name("application"); label->set_value(application); } + label = timeseries->add_labels(); + label->set_name("instance"); + label->set_value(instance); + if (version) { label = timeseries->add_labels(); label->set_name("version"); @@ -118,16 +118,16 @@ void add_metric( label->set_name("chart"); label->set_value(chart); - label = timeseries->add_labels(); - label->set_name("family"); - label->set_value(family); - if (dimension) { label = timeseries->add_labels(); label->set_name("dimension"); label->set_value(dimension); } + label = timeseries->add_labels(); + label->set_name("family"); + label->set_value(family); + label = timeseries->add_labels(); label->set_name("instance"); label->set_value(instance); From e39e809ac63eb73376c5fde1c769fb8c3915a8e7 Mon Sep 17 00:00:00 2001 From: thiagoftsm Date: Wed, 24 May 2023 19:19:16 +0000 Subject: [PATCH 06/23] Add chart labels to Prometheus. (#15099) --- exporting/README.md | 2 +- exporting/WALKTHROUGH.md | 17 ++++--- exporting/prometheus/prometheus.c | 85 ++++++++++++++++++++++++++----- 3 files changed, 83 insertions(+), 21 deletions(-) diff --git a/exporting/README.md b/exporting/README.md index c6ce32b6561faf..013f86f32eff78 100644 --- a/exporting/README.md +++ b/exporting/README.md @@ -284,7 +284,7 @@ Configure individual connectors and override any global settings with the follow and names are human friendly labels (also unique). Most charts and metrics have the same ID and name, but in several cases they are different: disks with device-mapper, interrupts, QoS classes, statsd synthetic charts, etc. -- `send configured labels = yes | no` controls if labels defined in the `[host labels]` section in `netdata.conf` +- `send configured labels = yes | no` controls if host labels defined in the `[host labels]` section in `netdata.conf` should be sent to the external database - `send automatic labels = yes | no` controls if automatically created labels, like `_os_name` or `_architecture` diff --git a/exporting/WALKTHROUGH.md b/exporting/WALKTHROUGH.md index 49cf6587b5ded9..86be758e4493d5 100644 --- a/exporting/WALKTHROUGH.md +++ b/exporting/WALKTHROUGH.md @@ -74,10 +74,10 @@ this is your first time using Netdata I suggest you take a look around. The amou Next I want to draw your attention to a particular endpoint. Navigate to In your browser. This is the endpoint which publishes all the metrics in a format which Prometheus understands. Let's take a look at one of these metrics. -`netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="system"} 0.0831255 1501271696000` This -metric is representing several things which I will go in more details in the section on Prometheus. For now understand -that this metric: `netdata_system_cpu_percentage_average` has several labels: (`chart`, `family`, `dimension`). This -corresponds with the first cpu chart you see on the Netdata dashboard. +`netdata_disk_space_GiB_average{chart="disk_space._run",dimension="avail",family="/run",mount_point="/run",filesystem="tmpfs",mount_root="/"} 0.0298195 1684951093000` +This metric is representing several things which I will go in more details in the section on Prometheus. For now understand +that this metric: `netdata_disk_space_GiB_average` has several labels: (`chart`, `family`, `dimension`, `mountt_point`, `filesystem`, `mount_root`). +This corresponds with disk space you see on the Netdata dashboard. ![](https://github.com/ldelossa/NetdataTutorial/raw/master/Screen%20Shot%202017-07-28%20at%204.00.45%20PM.png) @@ -138,12 +138,13 @@ As explained we have two key elements in Prometheus metrics. We have the _metric granularity between metrics. Let's use our previous example to further explain. ```conf -netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="system"} 0.0831255 1501271696000 +netdata_disk_space_GiB_average{chart="disk_space._run",dimension="avail",family="/run",mount_point="/run",filesystem="tmpfs",mount_root="/"} 0.0298195 1684951093000 ``` -Here our metric is `netdata_system_cpu_percentage_average` and our labels are `chart`, `family`, and `dimension`. The -last two values constitute the actual metric value for the metric type (gauge, counter, etc…). We can begin graphing -system metrics with this information, but first we need to hook up Prometheus to poll Netdata stats. +Here our metric is `netdata_disk_space_GiB_average` and our common labels are `chart`, `family`, and `dimension`. The +last two values constitute the actual metric value for the metric type (gauge, counter, etc…). We also have specific +label for this chart named `mount_point`,`filesystem`, and `mount_root`. We can begin graphing system metrics with this information, +but first we need to hook up Prometheus to poll Netdata stats. Let's move our attention to Prometheus's configuration. Prometheus gets it config from the file located (in our example) at `/opt/prometheus/prometheus.yml`. I won't spend an extensive amount of time going over the configuration values diff --git a/exporting/prometheus/prometheus.c b/exporting/prometheus/prometheus.c index 7a1112abdd5bc3..0e0e8abf01c705 100644 --- a/exporting/prometheus/prometheus.c +++ b/exporting/prometheus/prometheus.c @@ -326,6 +326,53 @@ void format_host_labels_prometheus(struct instance *instance, RRDHOST *host) rrdlabels_walkthrough_read(host->rrdlabels, format_prometheus_label_callback, &tmp); } +/** + * Format host labels for the Prometheus exporter + * We are using a structure instead a direct buffer to expand options quickly. + * + * @param labels_buffer is the buffer used to add labels. + */ + +struct format_prometheus_chart_label_callback { + BUFFER *labels_buffer; +}; + +static int format_prometheus_chart_label_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { + struct format_prometheus_chart_label_callback *d = (struct format_prometheus_chart_label_callback *)data; + + (void)ls; + + if (name[0] == '_' ) + return 1; + + char k[PROMETHEUS_ELEMENT_MAX + 1]; + char v[PROMETHEUS_ELEMENT_MAX + 1]; + + prometheus_name_copy(k, name, PROMETHEUS_ELEMENT_MAX); + prometheus_label_copy(v, value, PROMETHEUS_ELEMENT_MAX); + + if (*k && *v) { + buffer_sprintf(d->labels_buffer, ",%s=\"%s\"", k, v); + } + return 1; +} + +void format_chart_labels_prometheus(struct format_prometheus_chart_label_callback *plabel, + const char *chart, + const char *family, + const char *dim, + RRDSET *st) +{ + if (likely(plabel->labels_buffer)) + buffer_reset(plabel->labels_buffer); + else { + plabel->labels_buffer = buffer_create(1024, NULL); + } + buffer_sprintf(plabel->labels_buffer, "chart=\"%s\",dimension=\"%s\",family=\"%s\"", chart, dim, family); + + rrdlabels_walkthrough_read(st->rrdlabels, format_prometheus_chart_label_callback, plabel); +} + struct host_variables_callback_options { RRDHOST *host; BUFFER *wb; @@ -462,9 +509,17 @@ static void generate_as_collected_prom_help(BUFFER *wb, struct gen_parameters *p * @param p parameters for generating the metric string. * @param homogeneous a flag for homogeneous charts. * @param prometheus_collector a flag for metrics from prometheus collector. + * @param chart_labels the dictionary with chart labels */ -static void generate_as_collected_prom_metric(BUFFER *wb, struct gen_parameters *p, int homogeneous, int prometheus_collector) +static void generate_as_collected_prom_metric(BUFFER *wb, + struct gen_parameters *p, + int homogeneous, + int prometheus_collector, + DICTIONARY *chart_labels) { + struct format_prometheus_chart_label_callback local_label; + local_label.labels_buffer = wb; + buffer_sprintf(wb, "%s_%s", p->prefix, p->context); if (!homogeneous) @@ -475,7 +530,11 @@ static void generate_as_collected_prom_metric(BUFFER *wb, struct gen_parameters if (homogeneous) buffer_sprintf(wb, ",dimension=\"%s\"", p->dimension); - buffer_sprintf(wb, ",family=\"%s\"%s} ", p->family, p->labels); + buffer_sprintf(wb, ",family=\"%s\"", p->family); + + rrdlabels_walkthrough_read(chart_labels, format_prometheus_chart_label_callback, &local_label); + + buffer_sprintf(wb, "%s} ", p->labels); if (prometheus_collector) buffer_sprintf( @@ -564,6 +623,10 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus( // for each chart RRDSET *st; + + static struct format_prometheus_chart_label_callback plabels = { + .labels_buffer = NULL, + }; rrdset_foreach_read(st, host) { if (likely(can_send_rrdset(instance, st, filter))) { @@ -655,7 +718,7 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus( if (unlikely(output_options & PROMETHEUS_OUTPUT_TYPES)) buffer_sprintf(wb, "# TYPE %s_%s%s %s\n", prefix, context, suffix, p.type); - generate_as_collected_prom_metric(wb, &p, homogeneous, prometheus_collector); + generate_as_collected_prom_metric(wb, &p, homogeneous, prometheus_collector, st->rrdlabels); } else { // the dimensions of the chart, do not have the same algorithm, multiplier or divisor @@ -673,7 +736,7 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus( buffer_sprintf( wb, "# TYPE %s_%s_%s%s %s\n", prefix, context, dimension, suffix, p.type); - generate_as_collected_prom_metric(wb, &p, homogeneous, prometheus_collector); + generate_as_collected_prom_metric(wb, &p, homogeneous, prometheus_collector, st->rrdlabels); } } else { @@ -694,6 +757,8 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus( (output_options & PROMETHEUS_OUTPUT_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd), PROMETHEUS_ELEMENT_MAX); + format_chart_labels_prometheus(&plabels, chart, family, dimension, st); + if (unlikely(output_options & PROMETHEUS_OUTPUT_HELP)) buffer_sprintf( wb, @@ -713,30 +778,26 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus( if (output_options & PROMETHEUS_OUTPUT_TIMESTAMPS) buffer_sprintf( wb, - "%s_%s%s%s{chart=\"%s\",dimension=\"%s\",family=\"%s\"%s} " NETDATA_DOUBLE_FORMAT + "%s_%s%s%s{%s%s} " NETDATA_DOUBLE_FORMAT " %llu\n", prefix, context, units, suffix, - chart, - dimension, - family, + buffer_tostring(plabels.labels_buffer), labels, value, last_time * MSEC_PER_SEC); else buffer_sprintf( wb, - "%s_%s%s%s{chart=\"%s\",dimension=\"%s\",family=\"%s\"%s} " NETDATA_DOUBLE_FORMAT + "%s_%s%s%s{%s%s} " NETDATA_DOUBLE_FORMAT "\n", prefix, context, units, suffix, - chart, - dimension, - family, + buffer_tostring(plabels.labels_buffer), labels, value); } From 6765d03af33800a7ecb20b07caca98691713eb00 Mon Sep 17 00:00:00 2001 From: netdatabot Date: Thu, 25 May 2023 00:17:00 +0000 Subject: [PATCH 07/23] [ci skip] Update changelog and version for nightly build: v1.39.0-62-nightly. --- CHANGELOG.md | 18 ++++++++---------- packaging/version | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 370f172b6183a7..10444bb2598eef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ **Merged pull requests:** +- Add chart labels to Prometheus. [\#15099](https://github.com/netdata/netdata/pull/15099) ([thiagoftsm](https://github.com/thiagoftsm)) +- Invert order in remote write [\#15097](https://github.com/netdata/netdata/pull/15097) ([thiagoftsm](https://github.com/thiagoftsm)) +- fix cockroachdb alarms [\#15095](https://github.com/netdata/netdata/pull/15095) ([ilyam8](https://github.com/ilyam8)) +- Address issue with Thanos Receiver [\#15094](https://github.com/netdata/netdata/pull/15094) ([thiagoftsm](https://github.com/thiagoftsm)) +- Create category overview pages for learn's restructure [\#15091](https://github.com/netdata/netdata/pull/15091) ([Ancairon](https://github.com/Ancairon)) +- Release buffer in case of error -- CID 385075 [\#15090](https://github.com/netdata/netdata/pull/15090) ([stelfrag](https://github.com/stelfrag)) +- mentioned waive off of space subscription price [\#15082](https://github.com/netdata/netdata/pull/15082) ([hugovalente-pm](https://github.com/hugovalente-pm)) - Python Dependency Migration - OracleDB Python Module [\#15074](https://github.com/netdata/netdata/pull/15074) ([EricAndrechek](https://github.com/EricAndrechek)) - Free context when establishing ACLK connection [\#15073](https://github.com/netdata/netdata/pull/15073) ([stelfrag](https://github.com/stelfrag)) - Update Security doc [\#15072](https://github.com/netdata/netdata/pull/15072) ([tkatsoulas](https://github.com/tkatsoulas)) @@ -44,6 +51,7 @@ - Try to detect bind mounts [\#14831](https://github.com/netdata/netdata/pull/14831) ([MrZammler](https://github.com/MrZammler)) - Remove old logic for handling of legacy stock config files. [\#14829](https://github.com/netdata/netdata/pull/14829) ([Ferroin](https://github.com/Ferroin)) - fix infiniband bytes counters multiplier and divisor [\#14748](https://github.com/netdata/netdata/pull/14748) ([ilyam8](https://github.com/ilyam8)) +- New eBPF option [\#14691](https://github.com/netdata/netdata/pull/14691) ([thiagoftsm](https://github.com/thiagoftsm)) - initial minimal h2o webserver integration [\#14585](https://github.com/netdata/netdata/pull/14585) ([underhood](https://github.com/underhood)) ## [v1.39.1](https://github.com/netdata/netdata/tree/v1.39.1) (2023-05-18) @@ -398,16 +406,6 @@ - Correct link to ansible playbook [\#14468](https://github.com/netdata/netdata/pull/14468) ([cakrit](https://github.com/cakrit)) - Moved contents of get started to installer readme [\#14467](https://github.com/netdata/netdata/pull/14467) ([cakrit](https://github.com/cakrit)) - Add markdown files in Learn [\#14466](https://github.com/netdata/netdata/pull/14466) ([Ancairon](https://github.com/Ancairon)) -- Virtual hosts for data collection [\#14464](https://github.com/netdata/netdata/pull/14464) ([ktsaou](https://github.com/ktsaou)) -- Memory management eBPF [\#14462](https://github.com/netdata/netdata/pull/14462) ([thiagoftsm](https://github.com/thiagoftsm)) -- Add contents of packaging/installer/readme.md [\#14461](https://github.com/netdata/netdata/pull/14461) ([cakrit](https://github.com/cakrit)) -- Add mention of cloud in next steps UI etc [\#14459](https://github.com/netdata/netdata/pull/14459) ([cakrit](https://github.com/cakrit)) -- Fix links and add to learn [\#14458](https://github.com/netdata/netdata/pull/14458) ([cakrit](https://github.com/cakrit)) -- Add export for people running their own registry [\#14457](https://github.com/netdata/netdata/pull/14457) ([cakrit](https://github.com/cakrit)) -- Support installing extra packages in Docker images at runtime. [\#14456](https://github.com/netdata/netdata/pull/14456) ([Ferroin](https://github.com/Ferroin)) -- Prevent crash when running '-W createdataset' [\#14455](https://github.com/netdata/netdata/pull/14455) ([MrZammler](https://github.com/MrZammler)) -- remove deprecated python.d collectors announced in v1.38.0 [\#14454](https://github.com/netdata/netdata/pull/14454) ([ilyam8](https://github.com/ilyam8)) -- Update static build dependencies [\#14450](https://github.com/netdata/netdata/pull/14450) ([tkatsoulas](https://github.com/tkatsoulas)) ## [v1.38.1](https://github.com/netdata/netdata/tree/v1.38.1) (2023-02-13) diff --git a/packaging/version b/packaging/version index a7d245b70e0888..80506c6e3bc3ba 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.39.0-53-nightly +v1.39.0-62-nightly From 67c1b4419f41569e716ade122c0d5e6136cea128 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Thu, 25 May 2023 17:24:48 +0300 Subject: [PATCH 08/23] /api/v2/data percentage calculation on grouped queries (#15100) allow aggregation=percentage to calculate the percentage over any grouping --- web/api/queries/query.c | 53 +++++++++++++++++++++++++---------------- web/api/queries/query.h | 1 + 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/web/api/queries/query.c b/web/api/queries/query.c index 3770d47701bc46..33358af4f776dd 100644 --- a/web/api/queries/query.c +++ b/web/api/queries/query.c @@ -883,6 +883,9 @@ RRDR_GROUP_BY_FUNCTION group_by_aggregate_function_parse(const char *s) { if(strcmp(s, "sum") == 0) return RRDR_GROUP_BY_FUNCTION_SUM; + if(strcmp(s, "percentage") == 0) + return RRDR_GROUP_BY_FUNCTION_PERCENTAGE; + return RRDR_GROUP_BY_FUNCTION_AVERAGE; } @@ -900,6 +903,9 @@ const char *group_by_aggregate_function_to_string(RRDR_GROUP_BY_FUNCTION group_b case RRDR_GROUP_BY_FUNCTION_SUM: return "sum"; + + case RRDR_GROUP_BY_FUNCTION_PERCENTAGE: + return "percentage"; } } @@ -2555,9 +2561,9 @@ static void rrd2rrdr_set_timestamps(RRDR *r) { before_wanted, r->t[points_wanted - 1]); } -static void query_group_by_make_dimension_key(BUFFER *key, RRDR_GROUP_BY group_by, size_t group_by_id, QUERY_TARGET *qt, QUERY_NODE *qn, QUERY_CONTEXT *qc, QUERY_INSTANCE *qi, QUERY_DIMENSION *qd __maybe_unused, QUERY_METRIC *qm, bool query_has_percentage_of_instance) { +static void query_group_by_make_dimension_key(BUFFER *key, RRDR_GROUP_BY group_by, size_t group_by_id, QUERY_TARGET *qt, QUERY_NODE *qn, QUERY_CONTEXT *qc, QUERY_INSTANCE *qi, QUERY_DIMENSION *qd __maybe_unused, QUERY_METRIC *qm, bool query_has_percentage_of_group) { buffer_flush(key); - if(unlikely(!query_has_percentage_of_instance && qm->status & RRDR_DIMENSION_HIDDEN)) { + if(unlikely(!query_has_percentage_of_group && qm->status & RRDR_DIMENSION_HIDDEN)) { buffer_strcat(key, "__hidden_dimensions__"); } else if(unlikely(group_by & RRDR_GROUP_BY_SELECTED)) { @@ -2599,9 +2605,9 @@ static void query_group_by_make_dimension_key(BUFFER *key, RRDR_GROUP_BY group_b } } -static void query_group_by_make_dimension_id(BUFFER *key, RRDR_GROUP_BY group_by, size_t group_by_id, QUERY_TARGET *qt, QUERY_NODE *qn, QUERY_CONTEXT *qc, QUERY_INSTANCE *qi, QUERY_DIMENSION *qd __maybe_unused, QUERY_METRIC *qm, bool query_has_percentage_of_instance) { +static void query_group_by_make_dimension_id(BUFFER *key, RRDR_GROUP_BY group_by, size_t group_by_id, QUERY_TARGET *qt, QUERY_NODE *qn, QUERY_CONTEXT *qc, QUERY_INSTANCE *qi, QUERY_DIMENSION *qd __maybe_unused, QUERY_METRIC *qm, bool query_has_percentage_of_group) { buffer_flush(key); - if(unlikely(!query_has_percentage_of_instance && qm->status & RRDR_DIMENSION_HIDDEN)) { + if(unlikely(!query_has_percentage_of_group && qm->status & RRDR_DIMENSION_HIDDEN)) { buffer_strcat(key, "__hidden_dimensions__"); } else if(unlikely(group_by & RRDR_GROUP_BY_SELECTED)) { @@ -2654,9 +2660,9 @@ static void query_group_by_make_dimension_id(BUFFER *key, RRDR_GROUP_BY group_by } } -static void query_group_by_make_dimension_name(BUFFER *key, RRDR_GROUP_BY group_by, size_t group_by_id, QUERY_TARGET *qt, QUERY_NODE *qn, QUERY_CONTEXT *qc, QUERY_INSTANCE *qi, QUERY_DIMENSION *qd __maybe_unused, QUERY_METRIC *qm, bool query_has_percentage_of_instance) { +static void query_group_by_make_dimension_name(BUFFER *key, RRDR_GROUP_BY group_by, size_t group_by_id, QUERY_TARGET *qt, QUERY_NODE *qn, QUERY_CONTEXT *qc, QUERY_INSTANCE *qi, QUERY_DIMENSION *qd __maybe_unused, QUERY_METRIC *qm, bool query_has_percentage_of_group) { buffer_flush(key); - if(unlikely(!query_has_percentage_of_instance && qm->status & RRDR_DIMENSION_HIDDEN)) { + if(unlikely(!query_has_percentage_of_group && qm->status & RRDR_DIMENSION_HIDDEN)) { buffer_strcat(key, "__hidden_dimensions__"); } else if(unlikely(group_by & RRDR_GROUP_BY_SELECTED)) { @@ -2758,13 +2764,16 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { } // make sure there are valid group-by methods - bool query_has_percentage_of_instance = false; + bool query_has_percentage_of_group = false; for(size_t g = 0; g < MAX_QUERY_GROUP_BY_PASSES - 1 ;g++) { if(!(qt->request.group_by[g].group_by & SUPPORTED_GROUP_BY_METHODS)) qt->request.group_by[g].group_by = (g == 0) ? RRDR_GROUP_BY_DIMENSION : RRDR_GROUP_BY_NONE; if(qt->request.group_by[g].group_by & RRDR_GROUP_BY_PERCENTAGE_OF_INSTANCE) - query_has_percentage_of_instance = true; + query_has_percentage_of_group = true; + + if(qt->request.group_by[g].aggregation == RRDR_GROUP_BY_FUNCTION_PERCENTAGE) + query_has_percentage_of_group = true; } // merge all group-by options to upper levels @@ -2815,6 +2824,7 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { for(size_t g = 0; g < MAX_QUERY_GROUP_BY_PASSES ;g++) { RRDR_GROUP_BY group_by = qt->request.group_by[g].group_by; + RRDR_GROUP_BY_FUNCTION aggregation_method = qt->request.group_by[g].aggregation; if(group_by == RRDR_GROUP_BY_NONE) break; @@ -2855,7 +2865,7 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { // -------------------------------------------------------------------- // generate the group by key - query_group_by_make_dimension_key(key, group_by, g, qt, qn, qc, qi, qd, qm, query_has_percentage_of_instance); + query_group_by_make_dimension_key(key, group_by, g, qt, qn, qc, qi, qd, qm, query_has_percentage_of_group); // lookup the key in the dictionary @@ -2869,13 +2879,13 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { // ---------------------------------------------------------------- // generate the dimension id - query_group_by_make_dimension_id(key, group_by, g, qt, qn, qc, qi, qd, qm, query_has_percentage_of_instance); + query_group_by_make_dimension_id(key, group_by, g, qt, qn, qc, qi, qd, qm, query_has_percentage_of_group); entries[pos].id = string_strdupz(buffer_tostring(key)); // ---------------------------------------------------------------- // generate the dimension name - query_group_by_make_dimension_name(key, group_by, g, qt, qn, qc, qi, qd, qm, query_has_percentage_of_instance); + query_group_by_make_dimension_name(key, group_by, g, qt, qn, qc, qi, qd, qm, query_has_percentage_of_group); entries[pos].name = string_strdupz(buffer_tostring(key)); // add the rest of the info @@ -2914,7 +2924,7 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { // the query target adds to it the non-zero flag qm->status |= RRDR_DIMENSION_GROUPED; - if(query_has_percentage_of_instance) + if(query_has_percentage_of_group) // when the query has percentage of instance // there will be no hidden dimensions in the final query // so we have to remove the hidden flag from all dimensions @@ -2935,7 +2945,7 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { goto cleanup; } - bool hidden_dimension_on_percentage_of_instance = hidden_dimensions && (group_by & RRDR_GROUP_BY_PERCENTAGE_OF_INSTANCE); + bool hidden_dimension_on_percentage_of_group = hidden_dimensions && ((group_by & RRDR_GROUP_BY_PERCENTAGE_OF_INSTANCE) || (aggregation_method == RRDR_GROUP_BY_FUNCTION_PERCENTAGE)); // prevent double cleanup in case of error added = 0; @@ -2954,7 +2964,7 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { r->gbc = onewayalloc_callocz(owa, r->n * r->d, sizeof(*r->gbc)); r->dqp = onewayalloc_callocz(owa, r->d, sizeof(STORAGE_POINT)); - if(hidden_dimension_on_percentage_of_instance) + if(hidden_dimension_on_percentage_of_group) // this is where we are going to group the hidden dimensions r->vh = onewayalloc_mallocz(owa, r->n * r->d * sizeof(*r->vh)); @@ -3073,9 +3083,9 @@ static void rrd2rrdr_group_by_add_metric(RRDR *r_dst, size_t d_dst, RRDR *r_tmp, internal_fatal(!r_dst->dqp, "QUERY: group-by destination is not properly prepared (missing dqp array)"); internal_fatal(!r_dst->gbc, "QUERY: group-by destination is not properly prepared (missing gbc array)"); - bool hidden_dimension_on_percentage_of_instance = (r_tmp->od[d_tmp] & RRDR_DIMENSION_HIDDEN) && r_dst->vh; + bool hidden_dimension_on_percentage_of_group = (r_tmp->od[d_tmp] & RRDR_DIMENSION_HIDDEN) && r_dst->vh; - if(!hidden_dimension_on_percentage_of_instance) { + if(!hidden_dimension_on_percentage_of_group) { r_dst->od[d_dst] |= r_tmp->od[d_tmp]; storage_point_merge_to(r_dst->dqp[d_dst], *query_points); } @@ -3092,7 +3102,7 @@ static void rrd2rrdr_group_by_add_metric(RRDR *r_dst, size_t d_dst, RRDR *r_tmp, continue; size_t idx_dst = i * r_dst->d + d_dst; - NETDATA_DOUBLE *cn = (hidden_dimension_on_percentage_of_instance) ? &r_dst->vh[ idx_dst ] : &r_dst->v[ idx_dst ]; + NETDATA_DOUBLE *cn = (hidden_dimension_on_percentage_of_group) ? &r_dst->vh[ idx_dst ] : &r_dst->v[ idx_dst ]; RRDR_VALUE_FLAGS *co = &r_dst->o[ idx_dst ]; NETDATA_DOUBLE *ar = &r_dst->ar[ idx_dst ]; uint32_t *gbc = &r_dst->gbc[ idx_dst ]; @@ -3101,6 +3111,7 @@ static void rrd2rrdr_group_by_add_metric(RRDR *r_dst, size_t d_dst, RRDR *r_tmp, default: case RRDR_GROUP_BY_FUNCTION_AVERAGE: case RRDR_GROUP_BY_FUNCTION_SUM: + case RRDR_GROUP_BY_FUNCTION_PERCENTAGE: if(isnan(*cn)) *cn = n_tmp; else @@ -3118,7 +3129,7 @@ static void rrd2rrdr_group_by_add_metric(RRDR *r_dst, size_t d_dst, RRDR *r_tmp, break; } - if(!hidden_dimension_on_percentage_of_instance) { + if(!hidden_dimension_on_percentage_of_group) { *co &= ~RRDR_VALUE_EMPTY; *co |= (o_tmp & (RRDR_VALUE_RESET | RRDR_VALUE_PARTIAL)); *ar += ar_tmp; @@ -3161,7 +3172,7 @@ static void rrdr2rrdr_group_by_partial_trimming(RRDR *r) { } } -static void rrdr2rrdr_group_by_calculate_percentage_of_instance(RRDR *r) { +static void rrdr2rrdr_group_by_calculate_percentage_of_group(RRDR *r) { if(!r->vh) return; @@ -3291,7 +3302,7 @@ static RRDR *rrd2rrdr_group_by_finalize(RRDR *r_tmp) { // do the additional passes on RRDRs RRDR *last_r = r_tmp->group_by.r; - rrdr2rrdr_group_by_calculate_percentage_of_instance(last_r); + rrdr2rrdr_group_by_calculate_percentage_of_group(last_r); RRDR *r = last_r->group_by.r; size_t pass = 0; @@ -3302,7 +3313,7 @@ static RRDR *rrd2rrdr_group_by_finalize(RRDR *r_tmp) { qt->request.group_by[pass].aggregation, &last_r->dqp[d], pass); } - rrdr2rrdr_group_by_calculate_percentage_of_instance(r); + rrdr2rrdr_group_by_calculate_percentage_of_group(r); last_r = r; r = last_r->group_by.r; diff --git a/web/api/queries/query.h b/web/api/queries/query.h index e6fdcfbe4f2558..5eabb6c0397a42 100644 --- a/web/api/queries/query.h +++ b/web/api/queries/query.h @@ -85,6 +85,7 @@ typedef enum rrdr_group_by_function { RRDR_GROUP_BY_FUNCTION_MIN, RRDR_GROUP_BY_FUNCTION_MAX, RRDR_GROUP_BY_FUNCTION_SUM, + RRDR_GROUP_BY_FUNCTION_PERCENTAGE, } RRDR_GROUP_BY_FUNCTION; RRDR_GROUP_BY_FUNCTION group_by_aggregate_function_parse(const char *s); From 1467b4e86026a51342cc9f210c5a1eacc22df3f8 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Thu, 25 May 2023 18:16:06 +0100 Subject: [PATCH 09/23] update ml defaults to 24h (#15093) update defaults and docs --- ml/Config.cc | 8 ++++---- ml/README.md | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/ml/Config.cc b/ml/Config.cc index c5129c49dfdfc9..c00d2e8ee32ee2 100644 --- a/ml/Config.cc +++ b/ml/Config.cc @@ -25,10 +25,10 @@ void ml_config_load(ml_config_t *cfg) { * Read values */ - unsigned max_train_samples = config_get_number(config_section_ml, "maximum num samples to train", 4 * 3600); + unsigned max_train_samples = config_get_number(config_section_ml, "maximum num samples to train", 6 * 3600); unsigned min_train_samples = config_get_number(config_section_ml, "minimum num samples to train", 1 * 900); - unsigned train_every = config_get_number(config_section_ml, "train every", 1 * 3600); - unsigned num_models_to_use = config_get_number(config_section_ml, "number of models per dimension", 2); + unsigned train_every = config_get_number(config_section_ml, "train every", 3 * 3600); + unsigned num_models_to_use = config_get_number(config_section_ml, "number of models per dimension", 9); unsigned diff_n = config_get_number(config_section_ml, "num samples to diff", 1); unsigned smooth_n = config_get_number(config_section_ml, "num samples to smooth", 3); @@ -86,7 +86,7 @@ void ml_config_load(ml_config_t *cfg) { error("invalid min/max train samples found (%u >= %u)", min_train_samples, max_train_samples); min_train_samples = 1 * 3600; - max_train_samples = 4 * 3600; + max_train_samples = 6 * 3600; } /* diff --git a/ml/README.md b/ml/README.md index 60f38f22e56b8a..06baf509b5f500 100644 --- a/ml/README.md +++ b/ml/README.md @@ -127,10 +127,10 @@ Below is a list of all the available configuration params and their default valu ``` [ml] # enabled = yes - # maximum num samples to train = 14400 - # minimum num samples to train = 3600 - # train every = 3600 - # number of models per dimension = 2 + # maximum num samples to train = 21600 + # minimum num samples to train = 900 + # train every = 10800 + # number of models per dimension = 9 # dbengine anomaly rate every = 30 # num samples to diff = 1 # num samples to smooth = 3 @@ -186,10 +186,10 @@ This example assumes 3 child nodes [streaming](https://github.com/netdata/netdat ### Descriptions (min/max) - `enabled`: `yes` to enable, `no` to disable. -- `maximum num samples to train`: (`3600`/`86400`) This is the maximum amount of time you would like to train each model on. For example, the default of `14400` trains on the preceding 4 hours of data, assuming an `update every` of 1 second. +- `maximum num samples to train`: (`3600`/`86400`) This is the maximum amount of time you would like to train each model on. For example, the default of `21600` trains on the preceding 6 hours of data, assuming an `update every` of 1 second. - `minimum num samples to train`: (`900`/`21600`) This is the minimum amount of data required to be able to train a model. For example, the default of `900` implies that once at least 15 minutes of data is available for training, a model is trained, otherwise it is skipped and checked again at the next training run. -- `train every`: (`1800`/`21600`) This is how often each model will be retrained. For example, the default of `3600` means that each model is retrained every hour. Note: The training of all models is spread out across the `train every` period for efficiency, so in reality, it means that each model will be trained in a staggered manner within each `train every` period. -- `number of models per dimension`: (`1`/`168`) This is the number of trained models that will be used for scoring. For example the default `number of models per dimension = 2` means that the two most recently trained models (covering up to the most recent `maximum num samples to train` of training data) for the dimension will be used to determine the corresponding anomaly bit. Alternatively, if you have `train every = 3600` and `number of models per dimension = 24` this means that netdata will store and use the last 24 trained models for each dimension when determining the anomaly bit, this means that for the latest feature vector in this configuration to be considered anomalous it would need to look anomalous across _all_ the models trained for that dimension in the last 24 hours. As such, increasing `number of models per dimension` may reduce some false positives since it will result in more models (covering a wider time frame of training) being used during scoring. +- `train every`: (`1800`/`21600`) This is how often each model will be retrained. For example, the default of `10800` means that each model is retrained every 3 hours. Note: The training of all models is spread out across the `train every` period for efficiency, so in reality, it means that each model will be trained in a staggered manner within each `train every` period. +- `number of models per dimension`: (`1`/`168`) This is the number of trained models that will be used for scoring. For example the default `number of models per dimension = 9` means that just the most recently trained 9 models for the dimension will be used to determine the corresponding anomaly bit. This means that under default settings of `maximum num samples to train = 21600`, `train every = 10800` and `number of models per dimension = 9`, netdata will store and use the last 9 trained models for each dimension when determining the anomaly bit. This means that for the latest feature vector in this configuration to be considered anomalous it would need to look anomalous across _all_ the models trained for that dimension in the last 9*(10800/3600) ~= 27 hours. As such, increasing `number of models per dimension` may reduce some false positives since it will result in more models (covering a wider time frame of training) being used during scoring. - `dbengine anomaly rate every`: (`30`/`900`) This is how often netdata will aggregate all the anomaly bits into a single chart (`anomaly_detection.anomaly_rates`). The aggregation into a single chart allows enabling anomaly rate ranking over _all_ metrics with one API call as opposed to a call per chart. - `num samples to diff`: (`0`/`1`) This is a `0` or `1` to determine if you want the model to operate on differences of the raw data or just the raw data. For example, the default of `1` means that we take differences of the raw values. Using differences is more general and works on dimensions that might naturally tend to have some trends or cycles in them that is normal behavior to which we don't want to be too sensitive. - `num samples to smooth`: (`0`/`5`) This is a small integer that controls the amount of smoothing applied as part of the feature processing used by the model. For example, the default of `3` means that the rolling average of the last 3 values is used. Smoothing like this helps the model be a little more robust to spiky types of dimensions that naturally "jump" up or down as part of their normal behavior. From 6cf43a761815f506b71127f159afe6f3eb4bb782 Mon Sep 17 00:00:00 2001 From: netdatabot Date: Fri, 26 May 2023 00:17:29 +0000 Subject: [PATCH 10/23] [ci skip] Update changelog and version for nightly build: v1.39.0-65-nightly. --- CHANGELOG.md | 5 ++--- packaging/version | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10444bb2598eef..0ae952bb77b046 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,12 @@ **Merged pull requests:** +- /api/v2/data percentage calculation on grouped queries [\#15100](https://github.com/netdata/netdata/pull/15100) ([ktsaou](https://github.com/ktsaou)) - Add chart labels to Prometheus. [\#15099](https://github.com/netdata/netdata/pull/15099) ([thiagoftsm](https://github.com/thiagoftsm)) - Invert order in remote write [\#15097](https://github.com/netdata/netdata/pull/15097) ([thiagoftsm](https://github.com/thiagoftsm)) - fix cockroachdb alarms [\#15095](https://github.com/netdata/netdata/pull/15095) ([ilyam8](https://github.com/ilyam8)) - Address issue with Thanos Receiver [\#15094](https://github.com/netdata/netdata/pull/15094) ([thiagoftsm](https://github.com/thiagoftsm)) +- update ml defaults to 24h [\#15093](https://github.com/netdata/netdata/pull/15093) ([andrewm4894](https://github.com/andrewm4894)) - Create category overview pages for learn's restructure [\#15091](https://github.com/netdata/netdata/pull/15091) ([Ancairon](https://github.com/Ancairon)) - Release buffer in case of error -- CID 385075 [\#15090](https://github.com/netdata/netdata/pull/15090) ([stelfrag](https://github.com/stelfrag)) - mentioned waive off of space subscription price [\#15082](https://github.com/netdata/netdata/pull/15082) ([hugovalente-pm](https://github.com/hugovalente-pm)) @@ -403,9 +405,6 @@ - Add sbindir\_POST template for v235 service file [\#14471](https://github.com/netdata/netdata/pull/14471) ([MrZammler](https://github.com/MrZammler)) - Fix random crash on agent shutdown [\#14470](https://github.com/netdata/netdata/pull/14470) ([stelfrag](https://github.com/stelfrag)) - Move ansible md [\#14469](https://github.com/netdata/netdata/pull/14469) ([cakrit](https://github.com/cakrit)) -- Correct link to ansible playbook [\#14468](https://github.com/netdata/netdata/pull/14468) ([cakrit](https://github.com/cakrit)) -- Moved contents of get started to installer readme [\#14467](https://github.com/netdata/netdata/pull/14467) ([cakrit](https://github.com/cakrit)) -- Add markdown files in Learn [\#14466](https://github.com/netdata/netdata/pull/14466) ([Ancairon](https://github.com/Ancairon)) ## [v1.38.1](https://github.com/netdata/netdata/tree/v1.38.1) (2023-02-13) diff --git a/packaging/version b/packaging/version index 80506c6e3bc3ba..d12d932ceac4de 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.39.0-62-nightly +v1.39.0-65-nightly From 8bf58525b178987d600f27c81076719ce1d252b6 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Fri, 26 May 2023 20:33:53 +0300 Subject: [PATCH 11/23] fix the units when returning percentage of a group (#15105) --- database/contexts/query_target.c | 2 +- database/contexts/rrdcontext.h | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/database/contexts/query_target.c b/database/contexts/query_target.c index 69386a3f8b6c89..7759f85e8d056e 100644 --- a/database/contexts/query_target.c +++ b/database/contexts/query_target.c @@ -1049,7 +1049,7 @@ QUERY_TARGET *query_target_create(QUERY_TARGET_REQUEST *qtr) { qt->window.before = qt->request.before; qt->window.options = qt->request.options; - if(query_target_has_percentage_of_instance(qt)) + if(query_target_has_percentage_of_group(qt)) qt->window.options &= ~RRDR_OPTION_PERCENTAGE; rrdr_relative_window_to_absolute(&qt->window.after, &qt->window.before, &qt->window.now); diff --git a/database/contexts/rrdcontext.h b/database/contexts/rrdcontext.h index 5328483d6ee56e..f51cff7ec1a1cf 100644 --- a/database/contexts/rrdcontext.h +++ b/database/contexts/rrdcontext.h @@ -524,11 +524,15 @@ bool rrdcontext_retention_match(RRDCONTEXT_ACQUIRED *rca, time_t after, time_t b #define query_target_aggregatable(qt) ((qt)->window.options & RRDR_OPTION_RETURN_RAW) -static inline bool query_target_has_percentage_of_instance(QUERY_TARGET *qt) { - for(size_t g = 0; g < MAX_QUERY_GROUP_BY_PASSES ;g++) - if(qt->request.group_by[g].group_by & RRDR_GROUP_BY_PERCENTAGE_OF_INSTANCE) +static inline bool query_target_has_percentage_of_group(QUERY_TARGET *qt) { + for(size_t g = 0; g < MAX_QUERY_GROUP_BY_PASSES ;g++) { + if (qt->request.group_by[g].group_by & RRDR_GROUP_BY_PERCENTAGE_OF_INSTANCE) return true; + if (qt->request.group_by[g].aggregation == RRDR_GROUP_BY_FUNCTION_PERCENTAGE) + return true; + } + return false; } @@ -536,7 +540,7 @@ static inline bool query_target_needs_all_dimensions(QUERY_TARGET *qt) { if(qt->request.options & RRDR_OPTION_PERCENTAGE) return true; - return query_target_has_percentage_of_instance(qt); + return query_target_has_percentage_of_group(qt); } static inline bool query_target_has_percentage_units(QUERY_TARGET *qt) { @@ -546,7 +550,7 @@ static inline bool query_target_has_percentage_units(QUERY_TARGET *qt) { if((qt->request.options & RRDR_OPTION_PERCENTAGE) && !(qt->window.options & RRDR_OPTION_RETURN_RAW)) return true; - return query_target_has_percentage_of_instance(qt); + return query_target_has_percentage_of_group(qt); } #endif // NETDATA_RRDCONTEXT_H From 17a729f82ca8aa5151309971e7aafcc1ad6fff9b Mon Sep 17 00:00:00 2001 From: netdatabot Date: Sat, 27 May 2023 00:17:14 +0000 Subject: [PATCH 12/23] [ci skip] Update changelog and version for nightly build: v1.39.0-67-nightly. --- CHANGELOG.md | 2 +- packaging/version | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ae952bb77b046..2d4e4635d0af67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ **Merged pull requests:** +- fix the units when returning percentage of a group [\#15105](https://github.com/netdata/netdata/pull/15105) ([ktsaou](https://github.com/ktsaou)) - /api/v2/data percentage calculation on grouped queries [\#15100](https://github.com/netdata/netdata/pull/15100) ([ktsaou](https://github.com/ktsaou)) - Add chart labels to Prometheus. [\#15099](https://github.com/netdata/netdata/pull/15099) ([thiagoftsm](https://github.com/thiagoftsm)) - Invert order in remote write [\#15097](https://github.com/netdata/netdata/pull/15097) ([thiagoftsm](https://github.com/thiagoftsm)) @@ -404,7 +405,6 @@ - fix a possible bug with an image in the md file [\#14472](https://github.com/netdata/netdata/pull/14472) ([Ancairon](https://github.com/Ancairon)) - Add sbindir\_POST template for v235 service file [\#14471](https://github.com/netdata/netdata/pull/14471) ([MrZammler](https://github.com/MrZammler)) - Fix random crash on agent shutdown [\#14470](https://github.com/netdata/netdata/pull/14470) ([stelfrag](https://github.com/stelfrag)) -- Move ansible md [\#14469](https://github.com/netdata/netdata/pull/14469) ([cakrit](https://github.com/cakrit)) ## [v1.38.1](https://github.com/netdata/netdata/tree/v1.38.1) (2023-02-13) diff --git a/packaging/version b/packaging/version index d12d932ceac4de..73133949ad7072 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.39.0-65-nightly +v1.39.0-67-nightly From 6dbbabee8825819ec588b9d389fac1c5e13d7099 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Sat, 27 May 2023 06:25:55 +0300 Subject: [PATCH 13/23] percentage-of-group: fix uninitialized array vh (#15106) fix uninitialized array vh --- web/api/queries/query.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/web/api/queries/query.c b/web/api/queries/query.c index 33358af4f776dd..fe028262222ccd 100644 --- a/web/api/queries/query.c +++ b/web/api/queries/query.c @@ -2764,19 +2764,16 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { } // make sure there are valid group-by methods - bool query_has_percentage_of_group = false; - for(size_t g = 0; g < MAX_QUERY_GROUP_BY_PASSES - 1 ;g++) { + for(size_t g = 0; g < MAX_QUERY_GROUP_BY_PASSES ;g++) { if(!(qt->request.group_by[g].group_by & SUPPORTED_GROUP_BY_METHODS)) qt->request.group_by[g].group_by = (g == 0) ? RRDR_GROUP_BY_DIMENSION : RRDR_GROUP_BY_NONE; - - if(qt->request.group_by[g].group_by & RRDR_GROUP_BY_PERCENTAGE_OF_INSTANCE) - query_has_percentage_of_group = true; - - if(qt->request.group_by[g].aggregation == RRDR_GROUP_BY_FUNCTION_PERCENTAGE) - query_has_percentage_of_group = true; } - // merge all group-by options to upper levels + bool query_has_percentage_of_group = query_target_has_percentage_of_group(qt); + + // merge all group-by options to upper levels, + // so that the top level has all the groupings of the inner levels, + // and each subsequent level has all the groupings of its inner levels. for(size_t g = 0; g < MAX_QUERY_GROUP_BY_PASSES - 1 ;g++) { if(qt->request.group_by[g].group_by == RRDR_GROUP_BY_NONE) continue; @@ -2925,8 +2922,8 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { qm->status |= RRDR_DIMENSION_GROUPED; if(query_has_percentage_of_group) - // when the query has percentage of instance - // there will be no hidden dimensions in the final query + // when the query has percentage of group + // there will be no hidden dimensions in the final query, // so we have to remove the hidden flag from all dimensions entries[pos].od |= qm->status & ~RRDR_DIMENSION_HIDDEN; else @@ -2944,12 +2941,10 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { qt->id, qt->window.after, qt->window.before, added, qt->window.points); goto cleanup; } - - bool hidden_dimension_on_percentage_of_group = hidden_dimensions && ((group_by & RRDR_GROUP_BY_PERCENTAGE_OF_INSTANCE) || (aggregation_method == RRDR_GROUP_BY_FUNCTION_PERCENTAGE)); - - // prevent double cleanup in case of error + // prevent double free at cleanup in case of error added = 0; + // link this RRDR if(!last_r) first_r = last_r = r; else @@ -2964,7 +2959,7 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { r->gbc = onewayalloc_callocz(owa, r->n * r->d, sizeof(*r->gbc)); r->dqp = onewayalloc_callocz(owa, r->d, sizeof(STORAGE_POINT)); - if(hidden_dimension_on_percentage_of_group) + if(hidden_dimensions && ((group_by & RRDR_GROUP_BY_PERCENTAGE_OF_INSTANCE) || (aggregation_method == RRDR_GROUP_BY_FUNCTION_PERCENTAGE))) // this is where we are going to group the hidden dimensions r->vh = onewayalloc_mallocz(owa, r->n * r->d * sizeof(*r->vh)); @@ -3016,7 +3011,7 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { co[d] = RRDR_VALUE_EMPTY; if(vh) - *vh = NAN; + vh[d] = NAN; } } } From f0211f9f310f51c437842619655cf58996c7eaf7 Mon Sep 17 00:00:00 2001 From: netdatabot Date: Sun, 28 May 2023 00:17:51 +0000 Subject: [PATCH 14/23] [ci skip] Update changelog and version for nightly build: v1.39.0-69-nightly. --- CHANGELOG.md | 2 +- packaging/version | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d4e4635d0af67..f9c02a76e66c27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ **Merged pull requests:** +- percentage-of-group: fix uninitialized array vh [\#15106](https://github.com/netdata/netdata/pull/15106) ([ktsaou](https://github.com/ktsaou)) - fix the units when returning percentage of a group [\#15105](https://github.com/netdata/netdata/pull/15105) ([ktsaou](https://github.com/ktsaou)) - /api/v2/data percentage calculation on grouped queries [\#15100](https://github.com/netdata/netdata/pull/15100) ([ktsaou](https://github.com/ktsaou)) - Add chart labels to Prometheus. [\#15099](https://github.com/netdata/netdata/pull/15099) ([thiagoftsm](https://github.com/thiagoftsm)) @@ -404,7 +405,6 @@ - Add a file to Learn [\#14473](https://github.com/netdata/netdata/pull/14473) ([Ancairon](https://github.com/Ancairon)) - fix a possible bug with an image in the md file [\#14472](https://github.com/netdata/netdata/pull/14472) ([Ancairon](https://github.com/Ancairon)) - Add sbindir\_POST template for v235 service file [\#14471](https://github.com/netdata/netdata/pull/14471) ([MrZammler](https://github.com/MrZammler)) -- Fix random crash on agent shutdown [\#14470](https://github.com/netdata/netdata/pull/14470) ([stelfrag](https://github.com/stelfrag)) ## [v1.38.1](https://github.com/netdata/netdata/tree/v1.38.1) (2023-02-13) diff --git a/packaging/version b/packaging/version index 73133949ad7072..34aabdba7617b3 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.39.0-67-nightly +v1.39.0-69-nightly From 430c0ec56a7cf0d8672e5a5ee9a73455f030b1ad Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Mon, 29 May 2023 08:54:40 +0100 Subject: [PATCH 15/23] update agent telemetry url to be cloud function instead of posthog (#15085) * update agent events telemetry url to be cloud function instead of posthog. --- daemon/anonymous-statistics.sh.in | 7 +++---- docs/anonymous-statistics.md | 6 +++--- packaging/installer/kickstart.sh | 4 +--- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/daemon/anonymous-statistics.sh.in b/daemon/anonymous-statistics.sh.in index 8676ffbe7fc9b0..32cbc71dbdc59a 100755 --- a/daemon/anonymous-statistics.sh.in +++ b/daemon/anonymous-statistics.sh.in @@ -74,7 +74,6 @@ NETDATA_PREBUILT_DISTRO="${42}" # define body of request to be sent REQ_BODY="$(cat << EOF { - "api_key": "mqkwGT0JNFqO-zX2t0mW6Tec9yooaVu7xCBlXtHnt5Y", "event": "${ACTION} ${ACTION_RESULT}", "properties": { "distinct_id": "${NETDATA_REGISTRY_UNIQUE_ID}", @@ -164,9 +163,9 @@ REQ_BODY="$(cat << EOF EOF )" -# send the anonymous statistics to the Netdata PostHog +# send the anonymous statistics to Netdata if [ -n "$(command -v curl 2> /dev/null)" ]; then - curl --silent -o /dev/null --write-out '%{http_code}' -X POST --max-time 2 --header "Content-Type: application/json" -d "${REQ_BODY}" https://app.posthog.com/capture/ + curl --silent -o /dev/null --write-out '%{http_code}' -X POST --max-time 2 --header "Content-Type: application/json" -d "${REQ_BODY}" https://us-east1-netdata-analytics-bi.cloudfunctions.net/ingest_agent_events else wget -q -O - --no-check-certificate \ --server-response \ @@ -174,5 +173,5 @@ else --timeout=1 \ --header 'Content-Type: application/json' \ --body-data "${REQ_BODY}" \ - 'https://app.posthog.com/capture/' 2>&1 | awk '/^ HTTP/{print $2}' + 'https://us-east1-netdata-analytics-bi.cloudfunctions.net/ingest_agent_events' 2>&1 | awk '/^ HTTP/{print $2}' fi diff --git a/docs/anonymous-statistics.md b/docs/anonymous-statistics.md index 512cd02d33be94..d8cc99689ad3fd 100644 --- a/docs/anonymous-statistics.md +++ b/docs/anonymous-statistics.md @@ -8,8 +8,8 @@ learn_rel_path: "Configuration" # Anonymous telemetry events -By default, Netdata collects anonymous usage information from the open-source monitoring agent using the open-source -product analytics platform [PostHog](https://github.com/PostHog/posthog). We use their [cloud enterprise platform](https://posthog.com/product). +By default, Netdata collects anonymous usage information from the open-source monitoring agent. For agent events like start,stop,crash etc we use our own cloud function in GCP. For frontend telemetry (pageviews etc.) on the agent dashboard itself we use the open-source +product analytics platform [PostHog](https://github.com/PostHog/posthog). We are strongly committed to your [data privacy](https://netdata.cloud/privacy/). @@ -52,7 +52,7 @@ variable is controlled via the [opt-out mechanism](#opt-out). ## Agent Backend - Anonymous Statistics Script Every time the daemon is started or stopped and every time a fatal condition is encountered, Netdata uses the anonymous -statistics script to collect system information and send it to the Netdata PostHog via an http call. The information collected for all +statistics script to collect system information and send it to the Netdata telemetry cloud function via an http call. The information collected for all events is: - Netdata version diff --git a/packaging/installer/kickstart.sh b/packaging/installer/kickstart.sh index 3c6c035f91908a..ea3c6a58d8eb58 100755 --- a/packaging/installer/kickstart.sh +++ b/packaging/installer/kickstart.sh @@ -34,7 +34,7 @@ REPOCONFIG_RPM_URL_PREFIX="https://repo.netdata.cloud/repos/repoconfig" REPOCONFIG_RPM_VERSION="2-1" START_TIME="$(date +%s)" STATIC_INSTALL_ARCHES="x86_64 armv7l aarch64 ppc64le" -TELEMETRY_URL="https://app.posthog.com/capture/" +TELEMETRY_URL="https://us-east1-netdata-analytics-bi.cloudfunctions.net/ingest_agent_events" # ====================================================================== # Defaults for environment variables @@ -63,7 +63,6 @@ else fi NETDATA_TARBALL_BASEURL="${NETDATA_TARBALL_BASEURL:-https://github.com/netdata/netdata-nightlies/releases}" -TELEMETRY_API_KEY="${NETDATA_POSTHOG_API_KEY:-mqkwGT0JNFqO-zX2t0mW6Tec9yooaVu7xCBlXtHnt5Y}" if echo "${0}" | grep -q 'kickstart-static64'; then NETDATA_FORCE_METHOD='static' @@ -267,7 +266,6 @@ telemetry_event() { REQ_BODY="$(cat << EOF { - "api_key": "${TELEMETRY_API_KEY}", "event": "${1}", "properties": { "distinct_id": "${DISTINCT_ID}", From 1aed6efd3c603cf247dc56688d3543b6b650b490 Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Mon, 29 May 2023 13:00:47 +0300 Subject: [PATCH 16/23] oracledb: make conn protocol configurable (#15104) --- collectors/python.d.plugin/oracledb/oracledb.chart.py | 9 ++++++--- collectors/python.d.plugin/oracledb/oracledb.conf | 4 ++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/collectors/python.d.plugin/oracledb/oracledb.chart.py b/collectors/python.d.plugin/oracledb/oracledb.chart.py index 6bb2b285496f58..455cf270ee7080 100644 --- a/collectors/python.d.plugin/oracledb/oracledb.chart.py +++ b/collectors/python.d.plugin/oracledb/oracledb.chart.py @@ -16,6 +16,7 @@ HAS_ORACLE_NEW = False try: import cx_Oracle + HAS_ORACLE_OLD = True except ImportError: HAS_ORACLE_OLD = False @@ -328,6 +329,7 @@ def __init__(self, configuration=None, name=None): self.password = configuration.get('password') self.server = configuration.get('server') self.service = configuration.get('service') + self.protocol = configuration.get('protocol', 'tcps') self.alive = False self.conn = None self.active_tablespaces = set() @@ -338,7 +340,8 @@ def connect(self): self.conn = None if HAS_ORACLE_NEW: try: - self.conn = cx_Oracle.connect(f'{self.user}/{self.password}@tcps://{self.server}/{self.service}') + self.conn = cx_Oracle.connect( + f'{self.user}/{self.password}@{self.protocol}://{self.server}/{self.service}') except cx_Oracle.DatabaseError as error: self.error(error) return False @@ -824,7 +827,7 @@ def add_tablespace_to_charts(self, name): 'absolute', 1, 1000, - ]) + ]) self.charts['allocated_usage'].add_dimension( [ '{0}_allocated_used'.format(name), @@ -832,7 +835,7 @@ def add_tablespace_to_charts(self, name): 'absolute', 1, 1000, - ]) + ]) self.charts['allocated_usage_in_percent'].add_dimension( [ '{0}_allocated_used_in_percent'.format(name), diff --git a/collectors/python.d.plugin/oracledb/oracledb.conf b/collectors/python.d.plugin/oracledb/oracledb.conf index 6d7bb480f340c8..027215dad5002e 100644 --- a/collectors/python.d.plugin/oracledb/oracledb.conf +++ b/collectors/python.d.plugin/oracledb/oracledb.conf @@ -66,6 +66,8 @@ # server: localhost:1521 # the IP address or hostname (and port) of the Oracle Database Server. Required. # service: XE # the Oracle Database service name. Required. To view the services available on your server, # run this query: `select SERVICE_NAME from gv$session where sid in (select sid from V$MYSTAT)`. +# protocol: tcp/tcps # one of the strings "tcp" or "tcps" indicating whether to use unencrypted network traffic +# or encrypted network traffic # # ---------------------------------------------------------------------- # AUTO-DETECTION JOBS @@ -76,9 +78,11 @@ # password: 'secret' # server: 'localhost:1521' # service: 'XE' +# protocol: 'tcps' #remote: # user: 'netdata' # password: 'secret' # server: '10.0.0.1:1521' # service: 'XE' +# protocol: 'tcps' From bb835fe8eee4ed1b0f14b85a50578cadbc52dd53 Mon Sep 17 00:00:00 2001 From: Emmanuel Vasilakis Date: Mon, 29 May 2023 23:11:32 +0300 Subject: [PATCH 17/23] Only queue an alert to the cloud when it's inserted (#15110) only queue an alert to cloud when its inserted --- database/sqlite/sqlite_aclk_alert.c | 2 +- database/sqlite/sqlite_health.c | 8 +++++++- health/health_log.c | 7 ------- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/database/sqlite/sqlite_aclk_alert.c b/database/sqlite/sqlite_aclk_alert.c index f22e3eb03bba50..64b0efc8a9df0f 100644 --- a/database/sqlite/sqlite_aclk_alert.c +++ b/database/sqlite/sqlite_aclk_alert.c @@ -1046,7 +1046,7 @@ void aclk_push_alarm_checkpoint(RRDHOST *host __maybe_unused) if (rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS)) { //postpone checkpoint send - wc->alert_checkpoint_req++; + wc->alert_checkpoint_req+=3; log_access("ACLK REQ [%s (N/A)]: ALERTS CHECKPOINT POSTPONED", rrdhost_hostname(host)); return; } diff --git a/database/sqlite/sqlite_health.c b/database/sqlite/sqlite_health.c index 0e0991a466f505..d5f3dfaefd5d4a 100644 --- a/database/sqlite/sqlite_health.c +++ b/database/sqlite/sqlite_health.c @@ -354,8 +354,14 @@ void sql_health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { if (ae->flags & HEALTH_ENTRY_FLAG_SAVED) sql_health_alarm_log_update(host, ae); - else + else { sql_health_alarm_log_insert(host, ae); +#ifdef ENABLE_ACLK + if (netdata_cloud_setting) { + sql_queue_alarm_to_aclk(host, ae, 0); + } +#endif + } } /* Health related SQL queries diff --git a/health/health_log.c b/health/health_log.c index b1f59a1a54572d..a359b92e32fef6 100644 --- a/health/health_log.c +++ b/health/health_log.c @@ -5,14 +5,7 @@ // ---------------------------------------------------------------------------- inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { - sql_health_alarm_log_save(host, ae); - -#ifdef ENABLE_ACLK - if (netdata_cloud_setting) { - sql_queue_alarm_to_aclk(host, ae, 0); - } -#endif } // ---------------------------------------------------------------------------- From ae97383970f968907fd60c24001ec5da33254a7c Mon Sep 17 00:00:00 2001 From: netdatabot Date: Tue, 30 May 2023 00:16:56 +0000 Subject: [PATCH 18/23] [ci skip] Update changelog and version for nightly build: v1.39.0-73-nightly. --- CHANGELOG.md | 6 +++--- packaging/version | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9c02a76e66c27..0a7c5d304ad45b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,10 @@ **Merged pull requests:** +- Only queue an alert to the cloud when it's inserted [\#15110](https://github.com/netdata/netdata/pull/15110) ([MrZammler](https://github.com/MrZammler)) - percentage-of-group: fix uninitialized array vh [\#15106](https://github.com/netdata/netdata/pull/15106) ([ktsaou](https://github.com/ktsaou)) - fix the units when returning percentage of a group [\#15105](https://github.com/netdata/netdata/pull/15105) ([ktsaou](https://github.com/ktsaou)) +- oracledb: make conn protocol configurable [\#15104](https://github.com/netdata/netdata/pull/15104) ([ilyam8](https://github.com/ilyam8)) - /api/v2/data percentage calculation on grouped queries [\#15100](https://github.com/netdata/netdata/pull/15100) ([ktsaou](https://github.com/ktsaou)) - Add chart labels to Prometheus. [\#15099](https://github.com/netdata/netdata/pull/15099) ([thiagoftsm](https://github.com/thiagoftsm)) - Invert order in remote write [\#15097](https://github.com/netdata/netdata/pull/15097) ([thiagoftsm](https://github.com/thiagoftsm)) @@ -16,6 +18,7 @@ - update ml defaults to 24h [\#15093](https://github.com/netdata/netdata/pull/15093) ([andrewm4894](https://github.com/andrewm4894)) - Create category overview pages for learn's restructure [\#15091](https://github.com/netdata/netdata/pull/15091) ([Ancairon](https://github.com/Ancairon)) - Release buffer in case of error -- CID 385075 [\#15090](https://github.com/netdata/netdata/pull/15090) ([stelfrag](https://github.com/stelfrag)) +- update agent telemetry url to be cloud function instead of posthog [\#15085](https://github.com/netdata/netdata/pull/15085) ([andrewm4894](https://github.com/andrewm4894)) - mentioned waive off of space subscription price [\#15082](https://github.com/netdata/netdata/pull/15082) ([hugovalente-pm](https://github.com/hugovalente-pm)) - Python Dependency Migration - OracleDB Python Module [\#15074](https://github.com/netdata/netdata/pull/15074) ([EricAndrechek](https://github.com/EricAndrechek)) - Free context when establishing ACLK connection [\#15073](https://github.com/netdata/netdata/pull/15073) ([stelfrag](https://github.com/stelfrag)) @@ -402,9 +405,6 @@ - Remove obsolete or redundant docs [\#14476](https://github.com/netdata/netdata/pull/14476) ([cakrit](https://github.com/cakrit)) - Incorporate interoperability and fix edit link [\#14475](https://github.com/netdata/netdata/pull/14475) ([cakrit](https://github.com/cakrit)) - Upgrade demo sites to the getting started section [\#14474](https://github.com/netdata/netdata/pull/14474) ([cakrit](https://github.com/cakrit)) -- Add a file to Learn [\#14473](https://github.com/netdata/netdata/pull/14473) ([Ancairon](https://github.com/Ancairon)) -- fix a possible bug with an image in the md file [\#14472](https://github.com/netdata/netdata/pull/14472) ([Ancairon](https://github.com/Ancairon)) -- Add sbindir\_POST template for v235 service file [\#14471](https://github.com/netdata/netdata/pull/14471) ([MrZammler](https://github.com/MrZammler)) ## [v1.38.1](https://github.com/netdata/netdata/tree/v1.38.1) (2023-02-13) diff --git a/packaging/version b/packaging/version index 34aabdba7617b3..aacec50d115df9 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.39.0-69-nightly +v1.39.0-73-nightly From 44b6c223b3e13774df45a96dd48588aa8a66ba42 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Tue, 30 May 2023 11:13:59 +0300 Subject: [PATCH 19/23] percentage of group is now aggregatable at cloud across multiple nodes (#15109) --- web/api/formatters/json/json.c | 17 +++++++++++++---- web/api/formatters/rrd2json.h | 2 +- web/api/netdata-swagger.yaml | 11 ++++++++++- web/api/queries/query.c | 17 +++++++++-------- 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/web/api/formatters/json/json.c b/web/api/formatters/json/json.c index d5b8c7570e8144..d996b2ea204ae1 100644 --- a/web/api/formatters/json/json.c +++ b/web/api/formatters/json/json.c @@ -270,8 +270,12 @@ void rrdr2json_v2(RRDR *r, BUFFER *wb) { buffer_json_member_add_uint64(wb, "value", 0); buffer_json_member_add_uint64(wb, "arp", 1); buffer_json_member_add_uint64(wb, "pa", 2); - if(expose_gbc) - buffer_json_member_add_uint64(wb, "count", 3); + if(expose_gbc) { + if(r->vh) + buffer_json_member_add_uint64(wb, "hidden", 3); + else + buffer_json_member_add_uint64(wb, "count", 3); + } buffer_json_object_close(wb); buffer_json_member_add_array(wb, "data"); @@ -286,6 +290,7 @@ void rrdr2json_v2(RRDR *r, BUFFER *wb) { // for each line in the array for (i = start; i != end; i += step) { NETDATA_DOUBLE *cn = &r->v[ i * r->d ]; + NETDATA_DOUBLE *ch = (r->vh) ? &r->vh[ i * r->d ] : NULL; RRDR_VALUE_FLAGS *co = &r->o[ i * r->d ]; NETDATA_DOUBLE *ar = &r->ar[ i * r->d ]; uint32_t *gbc = &r->gbc [ i * r->d ]; @@ -325,8 +330,12 @@ void rrdr2json_v2(RRDR *r, BUFFER *wb) { buffer_json_add_array_item_uint64(wb, o); // add the count - if(expose_gbc) - buffer_json_add_array_item_uint64(wb, gbc[d]); + if(expose_gbc) { + if(ch) + buffer_json_add_array_item_double(wb, ch[d]); + else + buffer_json_add_array_item_uint64(wb, gbc[d]); + } buffer_json_array_close(wb); // point } diff --git a/web/api/formatters/rrd2json.h b/web/api/formatters/rrd2json.h index def26c754dd1f4..ca3a41aae27054 100644 --- a/web/api/formatters/rrd2json.h +++ b/web/api/formatters/rrd2json.h @@ -87,7 +87,7 @@ int rrdset2value_api_v1( ); static inline bool rrdr_dimension_should_be_exposed(RRDR_DIMENSION_FLAGS rrdr_dim_flags, RRDR_OPTIONS options) { - if(unlikely(options & RRDR_OPTION_RETURN_RAW)) + if(unlikely((options & RRDR_OPTION_RETURN_RAW) && (rrdr_dim_flags & RRDR_DIMENSION_QUERIED))) return true; if(unlikely(rrdr_dim_flags & RRDR_DIMENSION_HIDDEN)) return false; diff --git a/web/api/netdata-swagger.yaml b/web/api/netdata-swagger.yaml index c25f0b7194c953..b050f340733929 100644 --- a/web/api/netdata-swagger.yaml +++ b/web/api/netdata-swagger.yaml @@ -241,6 +241,7 @@ paths: A comma separated list of the groupings required. All possible values can be combined together, except `selected`. If `selected` is given in the list, all others are ignored. The order they are placed in the list is currently ignored. + This parameter is also accepted as `group_by[0]` and `group_by[1]` when multiple grouping passes are required. required: false schema: type: array @@ -261,6 +262,7 @@ paths: in: query description: | A comma separated list of the label keys to group by their values. The order of the labels in the list is respected. + This parameter is also accepted as `group_by_label[0]` and `group_by_label[1]` when multiple grouping passes are required. required: false schema: type: string @@ -271,6 +273,7 @@ paths: description: | The aggregation function to apply when grouping metrics together. When option `raw` is given, `average` and `avg` behave like `sum` and the caller is expected to calculate the average. + This parameter is also accepted as `aggregation[0]` and `aggregation[1]` when multiple grouping passes are required. required: false schema: type: string @@ -280,6 +283,7 @@ paths: - avg - average - sum + - percentage default: average - $ref: '#/components/parameters/scopeNodes' - $ref: '#/components/parameters/scopeContexts' @@ -2741,8 +2745,13 @@ components: type: integer count: description: | - The number of metrics aggregated into this point. This exists only when the option `raw` is given to the query. + The number of metrics aggregated into this point. + This exists only when the option `raw` is given to the query and the final aggregation point is NOT `percentage`. type: integer + hidden: + description: | + The sum of the non-selected dimensions aggregated for this group item point. + This exists only when the option `raw` is given to the query and the final aggregation method is `percentage`. data: type: array items: diff --git a/web/api/queries/query.c b/web/api/queries/query.c index fe028262222ccd..93f94453c56654 100644 --- a/web/api/queries/query.c +++ b/web/api/queries/query.c @@ -2992,7 +2992,7 @@ static RRDR *rrd2rrdr_group_by_initialize(ONEWAYALLOC *owa, QUERY_TARGET *qt) { // initialize partial trimming r->partial_data_trimming.max_update_every = update_every_max; r->partial_data_trimming.expected_after = - (!(qt->window.options & RRDR_OPTION_RETURN_RAW) && + (!query_target_aggregatable(qt) && qt->window.before >= qt->window.now - update_every_max) ? qt->window.before - update_every_max : qt->window.before; @@ -3168,7 +3168,7 @@ static void rrdr2rrdr_group_by_partial_trimming(RRDR *r) { } static void rrdr2rrdr_group_by_calculate_percentage_of_group(RRDR *r) { - if(!r->vh) + if(!r->vh || query_target_aggregatable(r->internal.qt)) return; for(size_t i = 0; i < r->n ;i++) { @@ -3191,7 +3191,10 @@ static void rrdr2rrdr_group_by_calculate_percentage_of_group(RRDR *r) { } } -static void rrd2rrdr_convert_to_percentage(RRDR *r) { +static void rrd2rrdr_convert_values_to_percentage_of_total(RRDR *r) { + if(!(r->internal.qt->window.options & RRDR_OPTION_PERCENTAGE) || query_target_aggregatable(r->internal.qt)) + return; + size_t global_min_max_values = 0; NETDATA_DOUBLE global_min = NAN, global_max = NAN; @@ -3289,8 +3292,7 @@ static RRDR *rrd2rrdr_group_by_finalize(RRDR *r_tmp) { if(!r_tmp->group_by.r) { // v1 query - if(options & RRDR_OPTION_PERCENTAGE) - rrd2rrdr_convert_to_percentage(r_tmp); + rrd2rrdr_convert_values_to_percentage_of_total(r_tmp); return r_tmp; } // v2 query @@ -3330,7 +3332,7 @@ static RRDR *rrd2rrdr_group_by_finalize(RRDR *r_tmp) { if(qt->request.group_by[g].group_by != RRDR_GROUP_BY_NONE) aggregation = qt->request.group_by[g].aggregation; - if(!(options & RRDR_OPTION_RETURN_RAW) && r->partial_data_trimming.expected_after < qt->window.before) + if(!query_target_aggregatable(qt) && r->partial_data_trimming.expected_after < qt->window.before) rrdr2rrdr_group_by_partial_trimming(r); // apply averaging, remove RRDR_VALUE_EMPTY, find the non-zero dimensions, min and max @@ -3422,8 +3424,7 @@ static RRDR *rrd2rrdr_group_by_finalize(RRDR *r_tmp) { qt->window.options &= ~RRDR_OPTION_NONZERO; } - if(options & RRDR_OPTION_PERCENTAGE && !(options & RRDR_OPTION_RETURN_RAW)) - rrd2rrdr_convert_to_percentage(r); + rrd2rrdr_convert_values_to_percentage_of_total(r); // update query instance counts in query host and query context { From 1aef5a70d8ba6df58ae29cd320fb0f7c0eeca6f0 Mon Sep 17 00:00:00 2001 From: "Austin S. Hemmelgarn" Date: Tue, 30 May 2023 08:38:32 -0400 Subject: [PATCH 20/23] Split plugins to individual packages for DEB/RPM packaging. (#13927) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update and normalize plugin package descriptions. This ensures they have accurate and cnocise descriptions of what they do, and that the descriptions are the same for both the RPM and DEB packages. * Split NFACCT plugin to it’s own package. Most users do not actually use it, so make life easier for them by reducing our dependency footprint and cutting down on how much they need to download. * Split charts.d.plugin to it’s own package. * Split eBPF plugin and code to their own packages. The code is in a separate package to simplify handling updates for it separately from the main agent code in the future. The eBPF plugin will still be installed by default in most cases when installing the Netdata Agent * Split python.d plugin to it’s own package. The python.d plugin will still be installed by default in most cases when installing the Netdata Agent * Split go.d plugin to it’s own package. The go.d plugin will still be installed by default in most cases when installing the Netdata Agent * Split apps.plugin to it’s own package The apps plugin will still be installed by default in most cases when installing the Netdata Agent * Properly split out postinst scripts for DEB packages. We should be modifying permissions and filecaps for plugins in the specific packages that install those plugins. * Clean up main files section in RPM spec file. This should get rid of the duplicate files warnings, as well as probably producing a more technically correct RPM. * Properly format DEB package descriptions. They should be folded at 76 characters. * Fix dependencies for split RPM plugin packages. * Fix most duplicate file warnings for RPM builds. * Split slabinfo plugin to it’s own package. * Clean up RPM conditional build. All platforms we build RPMs for have both netns and systemd support, so quit supporting platforms that don’t in our spec file. * Include loopsleepms.sh.inc in RPM package. * Fix packaging of nfacct plugin. * Skip building NFACCT plugin on RHEL clones. They lack the required dependencies. * Split perf.plugin to it’s own package. Also, start using CAP_PERFMON for it in RPM packages when available. * Fix typo in DEB postinstall scripts. * Fix issues with ebpf bundling scripts. They don’t need to preserve permissions, and choosing not to do so makes them work more reliably in a number of cases. * Improve dependency handling of secondary plugins. * Fix dependency handling. * Match ebpf code files more specifically in RPM spec file. * Fix branding and dependencies for RPM packages. - Change the eBPF plugin legacy code package name and description to reflect what it actually is. - Properly have the new plugin packages conflict with the older Netdata packages. This is needed for updates to work cleanly. - Only require the eBPF legacy code on older systems that need it. * Fix branding and dependencies for DEB packages. - Change the eBPF plugin legacy code package name and description to reflect what it actually is. - Properly have the new plugin packages conflict with the older Netdata packages. This is needed for updates to work cleanly. * Update docs for non-default plugins. * Have docs link back to package document. * Address review feedback. * Explicitly suggest plugins we are not including by default. This does not pull them in by default, but does make it easier for users to discover them. * Explicitly pull in default plugins on CentOS 7. * Fix broken merge of netdata spec file. * Resolve typo in kickstart script. * Explicitly disable FreeIPMI and NFACCT in RPMs if they are not available. * Fix RPM changelog. * Fix conditional plugin handling. * Fix disabling FreeIPMI on Amazon Linux. * Split new debugfs plugin to separate package. * Install debugfs plugin by default. --- collectors/charts.d.plugin/README.md | 2 + collectors/charts.d.plugin/ap/README.md | 2 + collectors/charts.d.plugin/apcupsd/README.md | 2 + .../charts.d.plugin/libreswan/README.md | 2 + collectors/charts.d.plugin/nut/README.md | 2 + collectors/charts.d.plugin/opensips/README.md | 2 + collectors/charts.d.plugin/sensors/README.md | 6 +- collectors/nfacct.plugin/README.md | 5 + collectors/perf.plugin/README.md | 3 + collectors/slabinfo.plugin/README.md | 3 + contrib/debian/control | 129 +++++- contrib/debian/netdata-plugin-apps.postinst | 13 + .../debian/netdata-plugin-debugfs.postinst | 13 + contrib/debian/netdata-plugin-ebpf.postinst | 13 + .../debian/netdata-plugin-freeipmi.postinst | 13 + contrib/debian/netdata-plugin-go.postinst | 13 + contrib/debian/netdata-plugin-nfacct.postinst | 13 + contrib/debian/netdata-plugin-perf.postinst | 17 + .../debian/netdata-plugin-slabinfo.postinst | 13 + contrib/debian/netdata.postinst | 17 - contrib/debian/rules | 97 ++++- netdata.spec.in | 408 ++++++++++++++---- packaging/bundle-ebpf.sh | 2 +- packaging/bundle-libbpf.sh | 6 +- packaging/installer/kickstart.sh | 12 + 25 files changed, 690 insertions(+), 118 deletions(-) create mode 100644 contrib/debian/netdata-plugin-apps.postinst create mode 100644 contrib/debian/netdata-plugin-debugfs.postinst create mode 100644 contrib/debian/netdata-plugin-ebpf.postinst create mode 100644 contrib/debian/netdata-plugin-freeipmi.postinst create mode 100644 contrib/debian/netdata-plugin-go.postinst create mode 100644 contrib/debian/netdata-plugin-nfacct.postinst create mode 100644 contrib/debian/netdata-plugin-perf.postinst create mode 100644 contrib/debian/netdata-plugin-slabinfo.postinst diff --git a/collectors/charts.d.plugin/README.md b/collectors/charts.d.plugin/README.md index 3e4edf5625da0b..97c2446fa9ddab 100644 --- a/collectors/charts.d.plugin/README.md +++ b/collectors/charts.d.plugin/README.md @@ -17,6 +17,8 @@ memory, collecting data with as little overheads as possible `charts.d.plugin` looks for scripts in `/usr/lib/netdata/charts.d`. The scripts should have the filename suffix: `.chart.sh`. +By default, `charts.d.plugin` is not included as part of the install when using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md). You can install it by installing the `netdata-plugin-chartsd` package. + ## Configuration `charts.d.plugin` itself can be [configured](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) using the configuration file `/etc/netdata/charts.d.conf`. This file is also a BASH script. diff --git a/collectors/charts.d.plugin/ap/README.md b/collectors/charts.d.plugin/ap/README.md index bc7460a28fd971..339ad13751e4af 100644 --- a/collectors/charts.d.plugin/ap/README.md +++ b/collectors/charts.d.plugin/ap/README.md @@ -85,6 +85,8 @@ Station 40:b8:37:5a:ed:5e (on wlan0) ## Configuration +If using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), make sure `netdata-plugin-chartsd` is installed. + Edit the `charts.d/ap.conf` configuration file using `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`. diff --git a/collectors/charts.d.plugin/apcupsd/README.md b/collectors/charts.d.plugin/apcupsd/README.md index 6934d59c03af1b..00e9697dc81c87 100644 --- a/collectors/charts.d.plugin/apcupsd/README.md +++ b/collectors/charts.d.plugin/apcupsd/README.md @@ -13,6 +13,8 @@ Monitors different APC UPS models and retrieves status information using `apcacc ## Configuration +If using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), make sure `netdata-plugin-chartsd` is installed. + Edit the `charts.d/apcupsd.conf` configuration file using `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`. diff --git a/collectors/charts.d.plugin/libreswan/README.md b/collectors/charts.d.plugin/libreswan/README.md index a20eb86c0a2ea7..b6eeb0180ca880 100644 --- a/collectors/charts.d.plugin/libreswan/README.md +++ b/collectors/charts.d.plugin/libreswan/README.md @@ -24,6 +24,8 @@ The following charts are created, **per tunnel**: ## Configuration +If using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), make sure `netdata-plugin-chartsd` is installed. + Edit the `charts.d/libreswan.conf` configuration file using `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`. diff --git a/collectors/charts.d.plugin/nut/README.md b/collectors/charts.d.plugin/nut/README.md index 4488254451ede3..4608ce3e1ac39f 100644 --- a/collectors/charts.d.plugin/nut/README.md +++ b/collectors/charts.d.plugin/nut/README.md @@ -53,6 +53,8 @@ The following charts will be created: ## Configuration +If using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), make sure `netdata-plugin-chartsd` is installed. + Edit the `charts.d/nut.conf` configuration file using `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`. diff --git a/collectors/charts.d.plugin/opensips/README.md b/collectors/charts.d.plugin/opensips/README.md index c278b53a02739a..1d7322140515fd 100644 --- a/collectors/charts.d.plugin/opensips/README.md +++ b/collectors/charts.d.plugin/opensips/README.md @@ -11,6 +11,8 @@ learn_rel_path: "Integrations/Monitor/Networking" ## Configuration +If using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), make sure `netdata-plugin-chartsd` is installed. + Edit the `charts.d/opensips.conf` configuration file using `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`. diff --git a/collectors/charts.d.plugin/sensors/README.md b/collectors/charts.d.plugin/sensors/README.md index 2601a2b65f61b5..0dbe96225b2806 100644 --- a/collectors/charts.d.plugin/sensors/README.md +++ b/collectors/charts.d.plugin/sensors/README.md @@ -21,13 +21,15 @@ One chart for every sensor chip found and each of the above will be created. ## Enable the collector +If using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), make sure `netdata-plugin-chartsd` is installed. + The `sensors` collector is disabled by default. -To enable the collector, you need to edit the configuration file of `charts.d/sensors.conf`. You can do so by using the `edit config` script. +To enable the collector, you need to edit the configuration file of `charts.d/sensors.conf`. You can do so by using the `edit config` script. > ### Info > -> To edit configuration files in a safe way, we provide the [`edit config` script](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) located in your [Netdata config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory) (typically is `/etc/netdata`) that creates the proper file and opens it in an editor automatically. +> To edit configuration files in a safe way, we provide the [`edit config` script](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) located in your [Netdata config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory) (typically is `/etc/netdata`) that creates the proper file and opens it in an editor automatically. > It is recommended to use this way for configuring Netdata. > > Please also note that after most configuration changes you will need to [restart the Agent](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for the changes to take effect. diff --git a/collectors/nfacct.plugin/README.md b/collectors/nfacct.plugin/README.md index e8502236fe1873..ae6597a409f6dc 100644 --- a/collectors/nfacct.plugin/README.md +++ b/collectors/nfacct.plugin/README.md @@ -13,6 +13,11 @@ learn_rel_path: "Integrations/Monitor/Networking" ## Prerequisites +If you are using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), install the +`netdata-plugin-nfacct` package using your system package manager. + +If you built Netdata locally: + 1. install `libmnl-dev` and `libnetfilter-acct-dev` using the package manager of your system. 2. re-install Netdata from source. The installer will detect that the required libraries are now available and will also build `netdata.plugin`. diff --git a/collectors/perf.plugin/README.md b/collectors/perf.plugin/README.md index e519be9c47cf29..a8bd4b0e5ee1fd 100644 --- a/collectors/perf.plugin/README.md +++ b/collectors/perf.plugin/README.md @@ -14,6 +14,9 @@ the `perf_event_open()` system call. ## Important Notes +If you are using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), you will need to install +the `netdata-plugin-perf` package using your system package manager. + Accessing hardware PMUs requires root permissions, so the plugin is setuid to root. Keep in mind that the number of PMUs in a system is usually quite limited and every hardware monitoring diff --git a/collectors/slabinfo.plugin/README.md b/collectors/slabinfo.plugin/README.md index e0abaff807cacb..abcbe1e3fe926d 100644 --- a/collectors/slabinfo.plugin/README.md +++ b/collectors/slabinfo.plugin/README.md @@ -18,6 +18,9 @@ Each internal structure (process, file descriptor, inode...) is stored within a The plugin is disabled by default because it collects and displays a huge amount of metrics. To enable it set `slabinfo = yes` in the `plugins` section of the `netdata.conf` configuration file. +If you are using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), you will additionally need to install the `netdata-plugin-slabinfo` +package using your system package manager. + There is currently no configuration needed for the plugin itself. As `/proc/slabinfo` is only readable by root, this plugin is setuid root. diff --git a/contrib/debian/control b/contrib/debian/control index eeeb8d25c67639..f4a767b3207896 100644 --- a/contrib/debian/control +++ b/contrib/debian/control @@ -42,6 +42,17 @@ Conflicts: netdata-core, netdata-plugins-bash, netdata-plugins-python, netdata-web +Recommends: netdata-plugin-ebpf, + netdata-plugin-apps, + netdata-plugin-pythond, + netdata-plugin-go, + netdata-plugin-debugfs +Suggests: netdata-plugin-cups, + netdata-plugin-freeipmi, + netdata-plugin-nfacct, + netdata-plugin-chartsd, + netdata-plugin-slabinfo, + netdata-plugin-perf Pre-Depends: dpkg (>= 1.17.14) Description: real-time charts for system monitoring Netdata is a daemon that collects data in realtime (per second) @@ -54,14 +65,120 @@ Architecture: any Depends: cups, ${shlibs:Depends}, netdata (>= ${source:Version}) -Description: The Common Unix Printing System plugin for metrics collection from cupsd +Description: The CUPS metrics collection plugin for the Netdata Agent + This plugin allows the Netdata Agent to collect metrics from the Common + UNIX Printing System. Package: netdata-plugin-freeipmi Architecture: any Depends: freeipmi, ${shlibs:Depends}, - netdata (= ${source:Version}) -Description: FreeIPMI - The Intelligent Platform Management System. - The IPMI specification defines a set of interfaces for platform management. - It is implemented by a number vendors for system management. The features of IPMI that most users will be interested in - are sensor monitoring, system event monitoring, power control, and serial-over-LAN (SOL). + netdata (>= ${source:Version}) +Description: The FreeIPMI metrics collection plugin for the Netdata Agent + This plugin allows the Netdata Agent to collect metrics from hardware + using FreeIPMI. + +Package: netdata-plugin-nfacct +Architecture: any +Depends: ${shlibs:Depends}, + netdata (>= ${source:Version}) +Conflicts: netdata (< ${source:Version}) +Description: The NFACCT metrics collection plugin for the Netdata Agent + This plugin allows the Netdata Agent to collect metrics from the firewall + using NFACCT objects. + +Package: netdata-plugin-chartsd +Architecture: all +Depends: bash, + netdata (>= ${source:Version}) +Conflicts: netdata (< ${source:Version}) +Suggests: apcupsd, nut, iw, sudo +Description: The charts.d metrics collection plugin for the Netdata Agent + This plugin adds a selection of additional collectors written in shell + script to the Netdata Agent. It includes collectors for NUT, APCUPSD, + LibreSWAN, OpenSIPS, and Wireless access point statistics. + +Package: netdata-plugin-ebpf +Architecture: any +Depends: ${shlibs:Depends}, + netdata (>= ${source:Version}) +Recommends: netdata-ebpf-code-legacy (>= ${source:Version}), + netdata-plugin-apps (>= ${source:Version}) +Conflicts: netdata (< ${source:Version}) +Description: The eBPF metrics collection plugin for the Netdata Agent + This plugin allows the Netdata Agent to use eBPF code to collect more + detailed kernel-level metrics for the system. + +Package: netdata-ebpf-code-legacy +Architecture: i386 amd64 +Depends: netdata-plugin-ebpf (= ${source:Version}) +Conflicts: netdata (< ${source:Version}) +Description: Compiled eBPF legacy code for the Netdata eBPF plugin + This package provides the pre-compiled eBPF legacy code for use by + the Netdata eBPF plugin. This code is only needed when using the eBPF + plugin with kernel that do not include BTF support (mostly kernel + versions lower than 5.10).. + +Package: netdata-plugin-pythond +Architecture: all +Depends: ${shlibs:Depends}, + netdata (>= ${source:Version}) +Suggests: sudo +Conflicts: netdata (< ${source:Version}) +Description: The python.d metrics collection plugin for the Netdata Agent + Many of the collectors provided by this package are also available + in netdata-plugin-god. In msot cases, you probably want to use those + versions instead of the Python versions. + +Package: netdata-plugin-go +Architecture: any +Depends: ${shlibs:Depends}, + libcap2-bin, + netdata (>= ${source:Version}) +Suggests: nvme-cli, sudo +Conflicts: netdata (< ${source:Version}) +Description: The go.d metrics collection plugin for the Netdata Agent + This plugin adds a selection of additional collectors written in Go to + the Netdata Agent. A significant percentage of the application specific + collectors provided by Netdata are part of this plugin, so most users + will want it installed. + +Package: netdata-plugin-apps +Architecture: any +Depends: ${shlibs:Depends}, + libcap2-bin, + netdata (>= ${source:Version}) +Conflicts: netdata (< ${source:Version}) +Description: The per-application metrics collection plugin for the Netdata Agent + This plugin allows the Netdata Agent to collect per-application and + per-user metrics without using cgroups. + +Package: netdata-plugin-slabinfo +Architecture: any +Depends: ${shlibs:Depends}, + libcap2-bin, + netdata (>= ${source:Version}) +Conflicts: netdata (< ${source:Version}) +Description: The slabinfo metrics collector for the Netdata Agent + This plugin allows the Netdata Agent to collect perfromance and + utilization metrics for the Linux kernel’s SLAB allocator. + +Package: netdata-plugin-perf +Architecture: any +Depends: ${shlibs:Depends}, + libcap2-bin, + netdata (>= ${source:Version}) +Conflicts: netdata (< ${source:Version}) +Description: The perf metrics collector for the Netdata Agent + This plugin allows the Netdata to collect metrics from the Linux perf + subsystem. + +Package: netdata-plugin-debugfs +Architecture: any +Depends: ${shlibs:Debends}, + libcap2-bin, + netdata (>= ${source:Version}) +Conflicts: netdata (< ${source:Version}) +Description: The debugfs metrics collector for the Netdata Agent + This plugin allows the Netdata Agent to collect Linux kernel metrics + exposed through debugfs. diff --git a/contrib/debian/netdata-plugin-apps.postinst b/contrib/debian/netdata-plugin-apps.postinst new file mode 100644 index 00000000000000..c224edd9216957 --- /dev/null +++ b/contrib/debian/netdata-plugin-apps.postinst @@ -0,0 +1,13 @@ +#!/bin/sh + +set -e + +case "$1" in + configure|reconfigure) + setcap "cap_dac_read_search=eip cap_sys_ptrace=eip" /usr/libexec/netdata/plugins.d/apps.plugin + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/contrib/debian/netdata-plugin-debugfs.postinst b/contrib/debian/netdata-plugin-debugfs.postinst new file mode 100644 index 00000000000000..f4b04406c0342c --- /dev/null +++ b/contrib/debian/netdata-plugin-debugfs.postinst @@ -0,0 +1,13 @@ +#!/bin/sh + +set -e + +case "$1" in + configure|reconfigure) + setcap "cap_dac_read_search=eip" /usr/libexec/netdata/plugins.d/debugfs.plugin + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/contrib/debian/netdata-plugin-ebpf.postinst b/contrib/debian/netdata-plugin-ebpf.postinst new file mode 100644 index 00000000000000..412bc82e818ffd --- /dev/null +++ b/contrib/debian/netdata-plugin-ebpf.postinst @@ -0,0 +1,13 @@ +#!/bin/sh + +set -e + +case "$1" in + configure|reconfigure) + chmod -f 4750 /usr/libexec/netdata/plugins.d/ebpf.plugin + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/contrib/debian/netdata-plugin-freeipmi.postinst b/contrib/debian/netdata-plugin-freeipmi.postinst new file mode 100644 index 00000000000000..303f77d6176909 --- /dev/null +++ b/contrib/debian/netdata-plugin-freeipmi.postinst @@ -0,0 +1,13 @@ +#!/bin/sh + +set -e + +case "$1" in + configure|reconfigure) + chmod -f 4750 /usr/libexec/netdata/plugins.d/freeipmi.plugin + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/contrib/debian/netdata-plugin-go.postinst b/contrib/debian/netdata-plugin-go.postinst new file mode 100644 index 00000000000000..90888c4df4e6b6 --- /dev/null +++ b/contrib/debian/netdata-plugin-go.postinst @@ -0,0 +1,13 @@ +#!/bin/sh + +set -e + +case "$1" in + configure|reconfigure) + setcap "cap_net_admin=eip cap_net_raw=eip" /usr/libexec/netdata/plugins.d/go.d.plugin + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/contrib/debian/netdata-plugin-nfacct.postinst b/contrib/debian/netdata-plugin-nfacct.postinst new file mode 100644 index 00000000000000..d9c8671ae3d820 --- /dev/null +++ b/contrib/debian/netdata-plugin-nfacct.postinst @@ -0,0 +1,13 @@ +#!/bin/sh + +set -e + +case "$1" in + configure|reconfigure) + chmod -f 4750 /usr/libexec/netdata/plugins.d/nfacct.plugin + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/contrib/debian/netdata-plugin-perf.postinst b/contrib/debian/netdata-plugin-perf.postinst new file mode 100644 index 00000000000000..731e1aaf62b6a6 --- /dev/null +++ b/contrib/debian/netdata-plugin-perf.postinst @@ -0,0 +1,17 @@ +#!/bin/sh + +set -e + +case "$1" in + configure|reconfigure) + if capsh --supports=cap_perfmon 2>/dev/null; then + setcap cap_perfmon+ep /usr/libexec/netdata/plugins.d/perf.plugin + else + setcap cap_sys_admin+ep /usr/libexec/netdata/plugins.d/perf.plugin + fi + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/contrib/debian/netdata-plugin-slabinfo.postinst b/contrib/debian/netdata-plugin-slabinfo.postinst new file mode 100644 index 00000000000000..3a66044662c204 --- /dev/null +++ b/contrib/debian/netdata-plugin-slabinfo.postinst @@ -0,0 +1,13 @@ +#!/bin/sh + +set -e + +case "$1" in + configure|reconfigure) + setcap "cap_dac_read_search=eip" /usr/libexec/netdata/plugins.d/apps.plugin + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/contrib/debian/netdata.postinst b/contrib/debian/netdata.postinst index 07b5b0eb5984ed..d5c84dc5d0070c 100644 --- a/contrib/debian/netdata.postinst +++ b/contrib/debian/netdata.postinst @@ -59,28 +59,11 @@ case "$1" in dpkg-statoverride --force --update --add root netdata 0775 /var/lib/netdata/registry > /dev/null 2>&1 chown -R root:netdata /usr/libexec/netdata/plugins.d - setcap cap_dac_read_search,cap_sys_ptrace+ep /usr/libexec/netdata/plugins.d/apps.plugin - setcap cap_dac_read_search+ep /usr/libexec/netdata/plugins.d/slabinfo.plugin setcap cap_dac_read_search+ep /usr/libexec/netdata/plugins.d/debugfs.plugin - if capsh --supports=cap_perfmon 2>/dev/null; then - setcap cap_perfmon+ep /usr/libexec/netdata/plugins.d/perf.plugin - else - setcap cap_sys_admin+ep /usr/libexec/netdata/plugins.d/perf.plugin - fi - - if [ -f "/usr/libexec/netdata/plugins.d/go.d.plugin" ]; then - setcap "cap_net_admin+epi cap_net_raw=eip" /usr/libexec/netdata/plugins.d/go.d.plugin - fi - chmod 4750 /usr/libexec/netdata/plugins.d/cgroup-network - chmod 4750 /usr/libexec/netdata/plugins.d/nfacct.plugin - - # Workaround if system does not have ebpf.plugin - chmod -f 4750 /usr/libexec/netdata/plugins.d/ebpf.plugin || true # Workaround for other plugins not installed directly by this package - chmod -f 4750 /usr/libexec/netdata/plugins.d/freeipmi.plugin || true chmod -f 4750 /usr/libexec/netdata/plugins.d/ioping || true ;; diff --git a/contrib/debian/rules b/contrib/debian/rules index 3d458654b8ad7c..626b5a69ff4e1e 100755 --- a/contrib/debian/rules +++ b/contrib/debian/rules @@ -60,6 +60,67 @@ override_dh_install: mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/freeipmi.plugin \ $(TOP)-plugin-freeipmi/usr/libexec/netdata/plugins.d/freeipmi.plugin + # Add free IPMI plugin install rules + # + mkdir -p $(TOP)-plugin-nfacct/usr/libexec/netdata/plugins.d + mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/nfacct.plugin \ + $(TOP)-plugin-nfacct/usr/libexec/netdata/plugins.d/nfacct.plugin + + # Add charts.d plugin install rules + # + mkdir -p $(TOP)-plugin-chartsd/usr/libexec/netdata/plugins.d/ + mkdir -p $(TOP)-plugin-chartsd/usr/lib/netdata/conf.d/ + mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/charts.d.plugin \ + $(TOP)-plugin-chartsd/usr/libexec/netdata/plugins.d/charts.d.plugin + mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/charts.d.dryrun-helper.sh \ + $(TOP)-plugin-chartsd/usr/libexec/netdata/plugins.d/charts.d.dryrun-helper.sh + mv -f $(TEMPTOP)/usr/libexec/netdata/charts.d \ + $(TOP)-plugin-chartsd/usr/libexec/netdata/charts.d + mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/charts.d.conf \ + $(TOP)-plugin-chartsd/usr/lib/netdata/conf.d/charts.d.conf + mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/charts.d \ + $(TOP)-plugin-chartsd/usr/lib/netdata/conf.d/charts.d + + # Add ebpf plugin install rules + [ $(HAVE_EBPF) -eq 1 ] && mkdir -p $(TOP)-plugin-ebpf/usr/libexec/netdata/plugins.d/ + [ $(HAVE_EBPF) -eq 1 ] && mkdir -p $(TOP)-plugin-ebpf/usr/lib/netdata/conf.d/ + [ $(HAVE_EBPF) -eq 1 ] && mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/ebpf.plugin \ + $(TOP)-plugin-ebpf/usr/libexec/netdata/plugins.d/ebpf.plugin + [ $(HAVE_EBPF) -eq 1 ] && mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/ebpf.d.conf \ + $(TOP)-plugin-ebpf/usr/lib/netdata/conf.d/ebpf.d.conf + [ $(HAVE_EBPF) -eq 1 ] && mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/ebpf.d \ + $(TOP)-plugin-ebpf/usr/lib/netdata/conf.d/ebpf.d + + # Add python plugin install rules + mkdir -p $(TOP)-plugin-pythond/usr/libexec/netdata/plugins.d/ + mkdir -p $(TOP)-plugin-pythond/usr/lib/netdata/conf.d/ + mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/python.d.plugin \ + $(TOP)-plugin-pythond/usr/libexec/netdata/plugins.d/python.d.plugin + mv -f $(TEMPTOP)/usr/libexec/netdata/python.d \ + $(TOP)-plugin-pythond/usr/libexec/netdata/python.d + mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/python.d.conf \ + $(TOP)-plugin-pythond/usr/lib/netdata/conf.d/python.d.conf + mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/python.d \ + $(TOP)-plugin-pythond/usr/lib/netdata/conf.d/python.d + + # Add apps plugin install rules + mkdir -p $(TOP)-plugin-apps/usr/libexec/netdata/plugins.d/ + mkdir -p $(TOP)-plugin-apps/usr/lib/netdata/conf.d/ + mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/apps.plugin \ + $(TOP)-plugin-apps/usr/libexec/netdata/plugins.d/apps.plugin + mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/apps_groups.conf \ + $(TOP)-plugin-apps/usr/lib/netdata/conf.d/apps_groups.conf + + # Add slabinfo plugin install rules + mkdir -p $(TOP)-plugin-slabinfo/usr/libexec/netdata/plugins.d/ + mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/slabinfo.plugin \ + $(TOP)-plugin-slabinfo/usr/libexec/netdata/plugins.d/slabinfo.plugin + + # Add perf plugin install rules + mkdir -p $(TOP)-plugin-perf/usr/libexec/netdata/plugins.d/ + mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/perf.plugin \ + $(TOP)-plugin-perf/usr/libexec/netdata/plugins.d/perf.plugin + # Set the rest of the software in the main package # cp -rp $(TEMPTOP)/usr $(TOP) @@ -82,13 +143,18 @@ override_dh_install: ln -s "/usr/share/netdata/www/$$D" "$(TOP)/var/lib/netdata/www/$$D"; \ done - if [ $(HAVE_EBPF) -eq 1 ]; then \ - packaging/bundle-ebpf.sh . ${TOP}/usr/libexec/netdata/plugins.d; \ - fi + # Handle eBPF code + # + [ $(HAVE_EBPF) -eq 1 ] && mkdir -p $(TOP)-plugin-ebpf-code/usr/libexec/netdata/plugins.d/ + [ $(HAVE_EBPF) -eq 1 ] && packaging/bundle-ebpf.sh . ${TOP}-plugin-ebpf-code/usr/libexec/netdata/plugins.d/ - # Install go + # Install go to it's own package directory # - debian/install_go.sh $$(cat ${CURDIR}/packaging/go.d.version) $(TOP)/usr/lib/netdata $(TOP)/usr/libexec/netdata + mkdir -p $(TOP)-plugin-go/usr/lib/netdata/conf.d + mkdir -p $(TOP)-plugin-go/usr/libexec/netdata/plugins.d + debian/install_go.sh $$(cat ${CURDIR}/packaging/go.d.version) \ + $(TOP)-plugin-go/usr/lib/netdata \ + $(TOP)-plugin-go/usr/libexec/netdata override_dh_installdocs: dh_installdocs @@ -109,14 +175,22 @@ override_dh_fixperms: # chmod 0755 $(TOP)/usr/libexec/netdata/netdata-updater.sh + # debugfs plugin + chmod 0750 $(TOP)/usr/libexec/netdata/plugins.d/debugfs.plugin + # apps.plugin should only be runnable by the netdata user. It will be # given extra capabilities in the postinst script. # - chmod 0750 $(TOP)/usr/libexec/netdata/plugins.d/apps.plugin - chmod 0750 $(TOP)/usr/libexec/netdata/plugins.d/debugfs.plugin - chmod 0750 $(TOP)/usr/libexec/netdata/plugins.d/perf.plugin - chmod 0750 $(TOP)/usr/libexec/netdata/plugins.d/slabinfo.plugin - chmod 0750 $(TOP)/usr/libexec/netdata/plugins.d/go.d.plugin + chmod 0750 $(TOP)-plugin-apps/usr/libexec/netdata/plugins.d/apps.plugin + + # slabinfo package + chmod 0750 $(TOP)-plugin-slabinfo/usr/libexec/netdata/plugins.d/slabinfo.plugin + + # perf package + chmod 0750 $(TOP)-plugin-perf/usr/libexec/netdata/plugins.d/perf.plugin + + # Go plugin package + chmod 0750 $(TOP)-plugin-go/usr/libexec/netdata/plugins.d/go.d.plugin # CUPS plugin package chmod 0750 $(TOP)-plugin-cups/usr/libexec/netdata/plugins.d/cups.plugin @@ -124,6 +198,9 @@ override_dh_fixperms: # freeIPMI plugin package chmod 4750 $(TOP)-plugin-freeipmi/usr/libexec/netdata/plugins.d/freeipmi.plugin + # NFACCT plugin package + chmod 4750 $(TOP)-plugin-nfacct/usr/libexec/netdata/plugins.d/nfacct.plugin + override_dh_installlogrotate: cp system/logrotate/netdata debian/netdata.logrotate dh_installlogrotate diff --git a/netdata.spec.in b/netdata.spec.in index 98fdbf71fad3cd..426d105f26a53c 100644 --- a/netdata.spec.in +++ b/netdata.spec.in @@ -27,20 +27,6 @@ AutoReqProv: yes %global _have_ebpf 0 %endif -# Disable FreeIPMI on Amazon Linux -%if 0%{?amzn} -%global _have_freeipmi 0 -%else -%global _have_freeipmi 1 -%endif - -# Disable the NFACCT plugin on Amazon Linux -%if 0%{?amzn} -%global _have_nfacct 0 -%else -%global _have_nfacct 1 -%endif - # Mitigate the cross-distro mayhem by strictly defining the libexec destination %define _prefix /usr %define _sysconfdir /etc @@ -54,13 +40,18 @@ AutoReqProv: yes # Redefine centos_ver to standardize on a single macro %{?rhel:%global centos_ver %rhel} -# -# Conditional build: -%bcond_without netns # build with netns support (cgroup-network) +# Disable FreeIPMI on Amazon Linux 2023 and newer +%if 0%{?amzn} >= 2023 +%global _have_freeipmi 0 +%else +%global _have_freeipmi 1 +%endif -%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version} >= 1140 +# Disable NFACCT for RHEL equivalents and Amazon Linux +%if 0%{?centos_ver} || 0%{?amzn} +%global _have_nfacct 0 %else -%undefine with_netns +%global _have_nfacct 1 %endif Summary: Real-time performance monitoring, done right! @@ -150,33 +141,49 @@ Requires(pre): /usr/sbin/groupadd Requires(pre): /usr/sbin/useradd # ##################################################################### -# Functionality-dependent package dependencies +# External plugin package dependencies # ##################################################################### -# Note: Some or all of the Packages may be found in the EPEL repo, -# rather than the standard ones - -# epbf dependencies +# CentOS prior to CentOS 8 does not have a new enough version of RPM +# to support weak dependencies. Explicitly requiring our default plugins +# makes it impossible to properly test the packages prior to upload, +# so we just skip depending on them on CentOS 7. +%if 0%{?centos_ver} != 7 %if 0%{?_have_ebpf} -%if 0%{?suse_version} -BuildRequires: libelf-devel -%else -BuildRequires: elfutils-libelf-devel +Recommends: netdata-plugin-ebpf %endif +Recommends: netdata-plugin-apps +Recommends: netdata-plugin-pythond +Recommends: netdata-plugin-go +Recommends: netdata-plugin-debugfs +%if 0%{?_have_freeipmi} +Suggests: netdata-plugin-freeipmi %endif -# end - ebpf dependencies +%if 0%{?_have_nfacct} +Suggests: netdata-plugin-nfacct +%endif +Suggests: netdata-plugin-cups +Suggests: netdata-plugin-chartsd +Suggests: netdata-plugin-slabinfo +Suggests: netdata-plugin-perf +%endif + +# ##################################################################### +# Functionality-dependent package dependencies +# ##################################################################### +# Note: Some or all of the Packages may be found in the EPEL repo, +# rather than the standard ones # nfacct plugin dependencies -%if %{_have_nfacct} + +%if 0%{?_have_nfacct} BuildRequires: libmnl-devel -%if 0%{?fedora} || 0%{?suse_version} >= 1140 BuildRequires: libnetfilter_acct-devel %endif -%endif # end nfacct plugin dependencies # freeipmi plugin dependencies -%if %{_have_freeipmi} +%if 0%{?_have_freeipmi} BuildRequires: freeipmi-devel %endif # end - freeipmi plugin dependencies @@ -234,6 +241,12 @@ autoreconf -ivf %if 0%{!?_have_ebpf} --disable-ebpf %endif + %if 0%{!?_have_freeipmi} + --disable-plugin-freeipmi + %endif + %if 0%{!?_have_nfacct} + --disable-plugin-nfacct + %endif %if 0%{?centos_ver:1} %if %{centos_ver} < 8 --with-bundled-protobuf \ @@ -272,7 +285,7 @@ install -m 644 -p system/logrotate/netdata "${RPM_BUILD_ROOT}%{_sysconfdir}/logr # ########################################################### # Install freeipmi -%if %{_have_freeipmi} +%if 0%{?_have_freeipmi} install -m 4750 -p freeipmi.plugin "${RPM_BUILD_ROOT}%{_libexecdir}/%{name}/plugins.d/freeipmi.plugin" %endif @@ -313,6 +326,11 @@ install -m 755 -d "${RPM_BUILD_ROOT}%{_localstatedir}/log/%{name}" # Install registry directory install -m 755 -d "${RPM_BUILD_ROOT}%{_localstatedir}/lib/%{name}/registry" +# ########################################################### +# Install uninstaller script +install -m 750 -p packaging/installer/netdata-uninstaller.sh \ + "${RPM_BUILD_ROOT}%{_libexecdir}/%{name}/netdata-uninstaller.sh" + # ########################################################### # Install netdata service install -m 755 -d "${RPM_BUILD_ROOT}%{_unitdir}" @@ -461,14 +479,13 @@ rm -rf "${RPM_BUILD_ROOT}" %files %doc README.md -%{_sysconfdir}/%{name} %config(noreplace) %{_sysconfdir}/%{name}/netdata.conf +%attr(0755,root,netdata) %{_sysconfdir}/%{name}/edit-config +%attr(0644,root,netdata) %{_sysconfdir}/%{name}/.install-type +%dir %{_sysconfdir}/%{name}/health.d +%dir %{_sysconfdir}/%{name}/statsd.d %config(noreplace) %{_sysconfdir}/logrotate.d/%{name} -%dir %{_libdir}/%{name} -%dir %{_datadir}/%{name} %{_libdir}/%{name} -%{_libdir}/%{name}/conf.d/ -%{_libexecdir}/%{name} %{_sbindir}/%{name} %{_sbindir}/netdatacli %{_sbindir}/netdata-claim.sh @@ -476,43 +493,33 @@ rm -rf "${RPM_BUILD_ROOT}" %{_unitdir}/netdata.service %{_presetdir}/50-netdata.preset -%defattr(0750,root,netdata,0750) - -%dir %{_libexecdir}/%{name}/python.d -%dir %{_libexecdir}/%{name}/charts.d +%dir %{_libexecdir}/%{name} %dir %{_libexecdir}/%{name}/plugins.d +%defattr(0750,root,netdata,0750) +%{_libexecdir}/%{name}/install-service.sh +%{_libexecdir}/%{name}/netdata-updater.sh +%{_libexecdir}/%{name}/netdata-uninstaller.sh +%{_libexecdir}/%{name}/plugins.d/acl.sh +%{_libexecdir}/%{name}/plugins.d/alarm.sh +%{_libexecdir}/%{name}/plugins.d/alarm-email.sh +%{_libexecdir}/%{name}/plugins.d/alarm-notify.sh +%{_libexecdir}/%{name}/plugins.d/alarm-test.sh +%{_libexecdir}/%{name}/plugins.d/anonymous-statistics.sh +%{_libexecdir}/%{name}/plugins.d/cgroup-name.sh +%{_libexecdir}/%{name}/plugins.d/get-kubernetes-labels.sh +%{_libexecdir}/%{name}/plugins.d/health-cmdapi-test.sh +%{_libexecdir}/%{name}/plugins.d/ioping.plugin +%{_libexecdir}/%{name}/plugins.d/loopsleepms.sh.inc +%{_libexecdir}/%{name}/plugins.d/request.sh +%{_libexecdir}/%{name}/plugins.d/system-info.sh +%{_libexecdir}/%{name}/plugins.d/tc-qos-helper.sh +%{_libexecdir}/%{name}/plugins.d/template_dim.sh -%{_libexecdir}/%{name}/python.d -%{_libexecdir}/%{name}/plugins.d - -%caps(cap_dac_read_search,cap_sys_ptrace=ep) %attr(0750,root,netdata) %{_libexecdir}/%{name}/plugins.d/apps.plugin - -%if %{with netns} # cgroup-network detects the network interfaces of CGROUPs # it must be able to use setns() and run cgroup-network-helper.sh as root # the helper script reads /proc/PID/fdinfo/* files, runs virsh, etc. %attr(4750,root,netdata) %{_libexecdir}/%{name}/plugins.d/cgroup-network %attr(0750,root,netdata) %{_libexecdir}/%{name}/plugins.d/cgroup-network-helper.sh -%endif - -# ebpf plugin -%if 0%{?_have_ebpf} -%attr(4750,root,netdata) %{_libexecdir}/%{name}/plugins.d/ebpf.plugin -%endif - -# perf plugin -# This should be CAP_PERFMON once RPM finally learns about it, but needs to be CAP_SYS_ADMIN for now. -# %caps(cap_perfmon=ep) %attr(0750,root,netdata) %{_libexecdir}/%{name}/plugins.d/perf.plugin -%caps(cap_sys_admin=ep) %attr(0750,root,netdata) %{_libexecdir}/%{name}/plugins.d/perf.plugin - -# debugfs plugin -%caps(cap_dac_read_search=ep) %attr(0750,root,netdata) %{_libexecdir}/%{name}/plugins.d/debugfs.plugin - -# slabinfo plugin -%caps(cap_dac_read_search=ep) %attr(0750,root,netdata) %{_libexecdir}/%{name}/plugins.d/slabinfo.plugin - -# go.d.plugin (the capability required for wireguard module) -%caps(cap_net_admin,cap_net_raw=eip) %{_libexecdir}/%{name}/plugins.d/go.d.plugin # Enforce 0644 for files and 0755 for directories # for the netdata web directory @@ -528,45 +535,287 @@ rm -rf "${RPM_BUILD_ROOT}" %attr(0770,netdata,netdata) %dir %{_localstatedir}/lib/%{name}/registry # Free IPMI belongs to a different sub-package -%if %{_have_freeipmi} +%if 0%{?_have_freeipmi} %exclude %{_libexecdir}/%{name}/plugins.d/freeipmi.plugin %endif +# NFACCT belongs to a different sub-package +%if 0%{?_have_nfacct} +%exclude %{_libexecdir}/%{name}/plugins.d/nfacct.plugin +%endif + +# Charts.d belongs to a different sub-package +%exclude %{_libexecdir}/%{name}/plugins.d/charts.d.plugin +%exclude %{_libexecdir}/%{name}/plugins.d/charts.d.dryrun-helper.sh +%exclude %{_libexecdir}/%{name}/charts.d/ +%exclude %{_libdir}/%{name}/conf.d/charts.d.conf +%exclude %{_libdir}/%{name}/conf.d/charts.d/ + +# eBPF belongs to a different sub-package +%if 0%{?_have_ebpf} +%exclude %{_libexecdir}/%{name}/plugins.d/ebpf.plugin +%exclude %{_libdir}/%{name}/conf.d/ebpf.d.conf +%exclude %{_libdir}/%{name}/conf.d/ebpf.d +%exclude %{_libexecdir}/%{name}/plugins.d/ebpf.d +%endif + +# Python.d belongs to a different sub-package +%exclude %{_libexecdir}/%{name}/plugins.d/python.d.plugin +%exclude %{_libexecdir}/%{name}/python.d +%exclude %{_libdir}/%{name}/conf.d/python.d.conf +%exclude %{_libdir}/%{name}/conf.d/python.d + +# Go.d belongs to a different sub-package +%exclude %{_libexecdir}/%{name}/plugins.d/go.d.plugin +%exclude %{_libdir}/%{name}/conf.d/go.d.conf +%exclude %{_libdir}/%{name}/conf.d/go.d + +# apps belongs to a different sub-package +%exclude %{_libexecdir}/%{name}/plugins.d/apps.plugin +%exclude %{_libdir}/%{name}/conf.d/apps_groups.conf + +# slabinfo belongs to a different sub-package +%exclude %{_libexecdir}/%{name}/plugins.d/slabinfo.plugin + +# perf belongs to a different sub-package +%exclude %{_libexecdir}/%{name}/plugins.d/perf.plugin + # CUPS belongs to a different sub package %if 0%{?centos_ver} != 6 && 0%{?centos_ver} != 7 %exclude %{_libexecdir}/%{name}/plugins.d/cups.plugin %package plugin-cups -Summary: The Common Unix Printing System plugin for netdata +Summary: The CUPS metrics collection plugin for the Netdata Agent Group: Applications/System Requires: cups >= 1.7 Requires: netdata = %{version} %description plugin-cups - This is the Common Unix Printing System plugin for the netdata daemon. -Use this plugin to enable metrics collection from cupsd, the daemon running when CUPS is enabled on the system + This plugin allows the Netdata Agent to collect metrics from the Common UNIX Printing System. %files plugin-cups %attr(0750,root,netdata) %{_libexecdir}/%{name}/plugins.d/cups.plugin %endif -%if %{_have_freeipmi} +%if 0%{?_have_freeipmi} %package plugin-freeipmi -Summary: FreeIPMI - The Intelligent Platform Management System +Summary: The FreeIPMI metrics collection plugin for the Netdata Agent Group: Applications/System Requires: freeipmi Requires: netdata = %{version} %description plugin-freeipmi - The IPMI specification defines a set of interfaces for platform management. -It is implemented by a number vendors for system management. The features of IPMI that most users will be interested in -are sensor monitoring, system event monitoring, power control, and serial-over-LAN (SOL). + This plugin allows the Netdata Agent to collect metrics from hardware using FreeIPMI. %files plugin-freeipmi %attr(4750,root,netdata) %{_libexecdir}/%{name}/plugins.d/freeipmi.plugin %endif +%if 0%{?_have_nfacct} +%package plugin-nfacct +Summary: The NFACCT metrics collection plugin for the Netdata Agent +Group: Applications/System +Requires: libmnl +Requires: netdata = %{version} +Conflicts: netdata <= %{version} +%if 0%{?fedora} || 0%{?suse_version} >= 1140 +Requires: libnetfilter_acct +%endif + +%description plugin-nfacct + This plugin allows the Netdata Agent to collect metrics from the firewall using NFACCT objects. + +%files plugin-nfacct +%attr(4750,root,netdata) %{_libexecdir}/%{name}/plugins.d/nfacct.plugin +%endif + +%package plugin-chartsd +Summary: The charts.d metrics collection plugin for the Netdata Agent +Group: Applications/System +Requires: bash +Requires: netdata = %{version} +Conflicts: netdata <= %{version} +%if 0%{?centos_ver} != 7 +Suggests: nut +Suggests: apcupsd +Suggests: iw +Suggests: sudo +%endif + +%description plugin-chartsd + This plugin adds a selection of additional collectors written in shell script to the Netdata Agent. +It includes collectors for NUT, APCUPSD, LibreSWAN, OpenSIPS, and Wireless access point statistics. + +%files plugin-chartsd +%defattr(0750,root,netdata,0750) +%{_libexecdir}/%{name}/plugins.d/charts.d.plugin +%{_libexecdir}/%{name}/plugins.d/charts.d.dryrun-helper.sh +%{_libexecdir}/%{name}/charts.d/ +%defattr(0644,root,netdata,0644) +%{_libdir}/%{name}/conf.d/charts.d.conf +%{_libdir}/%{name}/conf.d/charts.d/ + +%if 0%{?_have_ebpf} +%package plugin-ebpf +Summary: The eBPF metrics collection plugin for the Netdata Agent +Group: Applications/System +Requires: netdata = %{version} +Conflicts: netdata <= %{version} +%if 0%{?centos_ver} != 7 +Recommends: netdata-plugin-apps = %{version} +Recommends: netdata-ebpf-legacy-code >= %{version} +%else +Requires: netdata-plugin-apps = %{version} +Requires: netdata-ebpf-legacy-code >= %{version} +%endif + +%description plugin-ebpf + This plugin allows the Netdata Agent to use eBPF code to collect more detailed kernel-level metrics for the system. + +%files plugin-ebpf +%defattr(4750,root,netdata,4750) +%{_libexecdir}/%{name}/plugins.d/ebpf.plugin +%defattr(0644,root,netdata,0644) +%{_libdir}/%{name}/conf.d/ebpf.d.conf +%{_libdir}/%{name}/conf.d/ebpf.d + +%package ebpf-legacy-code +Summary: Compiled eBPF legacy code for the Netdata eBPF plugin +Group: Applications/System +Requires: netdata-plugin-ebpf = %{version} +Conflicts: netdata <= %{version} + +%description ebpf-legacy-code + This package provides the pre-compiled eBPF legacy code for use by the Netdata eBPF plugin. +This code is only needed when using the eBPF plugin with kernel versions before 5.10. + +%files ebpf-legacy-code +%defattr(0640,root,netdata,0640) +%{_libexecdir}/%{name}/plugins.d/ebpf.d/*.o + +%endif + +%package plugin-pythond +Summary: The python.d metrics collection plugin for the Netdata Agent +Group: Applications/System +Requires: netdata = %{version} +Conflicts: netdata <= %{version} +%if 0%{?centos_ver} == 7 || 0%{?centos_ver} == 6 +Requires: python +%else +%if 0%{?centos_ver} == 8 +Requires: python38 +%else +Requires: python3 +%endif +%endif +%if 0%{?centos_ver} != 7 +Suggests: sudo +%endif + +%description plugin-pythond + This plugin adds a selection of additional collectors written in Python to the Netdata Agent. +Many of the collectors provided by this package are also available in netdata-plugin-go. In msot cases, you probably +want to use those versions instead of the Python versions. + +%files plugin-pythond +%defattr(0750,root,netdata,0750) +%{_libexecdir}/%{name}/plugins.d/python.d.plugin +%{_libexecdir}/%{name}/python.d +%defattr(0640,root,netdata,0640) +%{_libdir}/%{name}/conf.d/python.d.conf +%{_libdir}/%{name}/conf.d/python.d + +%package plugin-go +Summary: The go.d metrics collection plugin for the Netdata Agent +Group: Applications/System +Requires: netdata = %{version} +Conflicts: netdata <= %{version} +%if 0%{?centos_ver} != 7 +Suggests: nvme-cli +Suggests: sudo +%endif + +%description plugin-go + This plugin adds a selection of additional collectors written in Go to the Netdata Agent +A significant percentage of the application specific collectors provided by Netdata are part of this plugin, +so most users will want it installed. + +%files plugin-go +%defattr(0750,root,netdata,0750) +# CAP_NET_ADMIN needed for WireGuard collector +# CAP_NET_RAW needed for ping collector +%caps(cap_net_admin,cap_net_raw=eip) %{_libexecdir}/%{name}/plugins.d/go.d.plugin +%defattr(0644,root,netdata,0644) +%{_libdir}/%{name}/conf.d/go.d.conf +%{_libdir}/%{name}/conf.d/go.d + +%package plugin-apps +Summary: The per-application metrics collection plugin for the Netdata Agent +Group: Applications/System +Requires: netdata = %{version} +Conflicts: netdata <= %{version} + +%description plugin-apps + This plugin allows the Netdata Agent to collect per-application and per-user metrics without using cgroups. + +%files plugin-apps +%defattr(0750,root,netdata,0750) +# CAP_DAC_READ_SEARCH and CAP_SYS_PTRACE needed for data collection by the plugin. +%caps(cap_dac_read_search,cap_sys_ptrace=ep) %{_libexecdir}/%{name}/plugins.d/apps.plugin +%defattr(0644,root,netdata,0644) +%{_libdir}/%{name}/conf.d/apps_groups.conf + +%package plugin-slabinfo +Summary: The slabinfo metrics collector for the Netdata Agent +Group: Applications/System +Requires: netdata = %{version} +Conflicts: netdata <= %{version} + +%description plugin-slabinfo + This plugin allows the Netdata Agent to collect perfromance and utilization metrics for the Linux kernel’s SLAB allocator. + +%files plugin-slabinfo +%defattr(0750,root,netdata,0750) +# CAP_DAC_READ_SEARCH needed to access the files the plugin reads to collect data. +%caps(cap_dac_read_search=ep) %{_libexecdir}/%{name}/plugins.d/slabinfo.plugin + +%package plugin-perf +Summary: The perf metrics collector for the Netdata Agent +Group: Applications/System +Requires: netdata = %{version} +Conflicts: netdata <= %{version} + +%description plugin-perf + This plugin allows the Netdata to collect metrics from the Linux perf subsystem. + +%files plugin-perf +%defattr(0750,root,netdata,0750) +# Either CAP_SYS_ADMIN or CAP_PERFMON needed for data collection +# PERFMON is newer, so only try to use it on platforms which support it. +%if 0%{?centos_ver} >= 9 || 0%{?fedora} >= 36 +%caps(cap_perfmon=ep) %{_libexecdir}/%{name}/plugins.d/perf.plugin +%else +%caps(cap_sys_admin=ep) %{_libexecdir}/%{name}/plugins.d/perf.plugin +%endif + +%package plugin-debugfs +Summary: The debugfs metrics collector for the Netdata Agent +Group: Applications/System +Requires: netdata = %{version} +Conflicts: netdata <= %{version} + +%description plugin-debugfs + This plugin allows the Netdata Agent to collect Linux kernel metrics exposed through debugfs. + +%files plugin-debugfs +%defattr(0750,root,netdata,0750) +# CAP_DAC_READ_SEARCH required for data collection. +%caps(cap_dac_read_search=ep) %attr(0750,root,netdata) %{_libexecdir}/%{name}/plugins.d/debugfs.plugin + %changelog +* Fri Apr 07 2023 Austin Hemmelgarn 0.0.0-19 +- Split additional plugins out in their own packages. * Tue Mar 21 2023 Austin Hemmelgarn 0.0.0-18 - Fix systemd handling to follow BCP. - Drop pre-systemd init support. @@ -609,8 +858,9 @@ First draft refactor on package dependencies section * Wed Jan 02 2019 Pawel Krupa - 0.0.0-3 - Temporary set version statically - Fix changelog ordering -- Comment-out node.d configuration directory +- Comment-out node.d configuration directory * Wed Jan 02 2019 Pawel Krupa - 0.0.0-2 - Fix permissions for log files * Sun Nov 15 2015 Alon Bar-Lev - 0.0.0-1 - Initial add. + diff --git a/packaging/bundle-ebpf.sh b/packaging/bundle-ebpf.sh index 3204345b0412a8..29fc6a2aa0a4df 100755 --- a/packaging/bundle-ebpf.sh +++ b/packaging/bundle-ebpf.sh @@ -15,5 +15,5 @@ if [ -x "${PLUGINDIR}/ebpf.plugin" ] ; then mkdir "${PLUGINDIR}/ebpf.d" fi # shellcheck disable=SC2046 - cp -a $(find "${SRCDIR}/tmp/ebpf" -mindepth 1 -maxdepth 1) "${PLUGINDIR}/ebpf.d" + cp -r $(find "${SRCDIR}/tmp/ebpf" -mindepth 1 -maxdepth 1) "${PLUGINDIR}/ebpf.d" fi diff --git a/packaging/bundle-libbpf.sh b/packaging/bundle-libbpf.sh index 7e6e22a9edb19d..1c55427657d669 100755 --- a/packaging/bundle-libbpf.sh +++ b/packaging/bundle-libbpf.sh @@ -22,6 +22,6 @@ curl -sSL --connect-timeout 10 --retry 3 "https://github.com/netdata/libbpf/arch sha256sum -c "${1}/packaging/libbpf.checksums" || exit 1 tar -xzf "${LIBBPF_TARBALL}" -C "${1}/externaldeps/libbpf" || exit 1 make -C "${LIBBPF_BUILD_PATH}/src" BUILD_STATIC_ONLY=1 OBJDIR=build/ DESTDIR=../ install || exit 1 -cp -a "${LIBBPF_BUILD_PATH}/usr/${lib_subdir}/libbpf.a" "${1}/externaldeps/libbpf" || exit 1 -cp -a "${LIBBPF_BUILD_PATH}/usr/include" "${1}/externaldeps/libbpf" || exit 1 -cp -a "${LIBBPF_BUILD_PATH}/include/uapi" "${1}/externaldeps/libbpf/include" || exit 1 +cp -r "${LIBBPF_BUILD_PATH}/usr/${lib_subdir}/libbpf.a" "${1}/externaldeps/libbpf" || exit 1 +cp -r "${LIBBPF_BUILD_PATH}/usr/include" "${1}/externaldeps/libbpf" || exit 1 +cp -r "${LIBBPF_BUILD_PATH}/include/uapi" "${1}/externaldeps/libbpf/include" || exit 1 diff --git a/packaging/installer/kickstart.sh b/packaging/installer/kickstart.sh index ea3c6a58d8eb58..4936a40fe4c0a7 100755 --- a/packaging/installer/kickstart.sh +++ b/packaging/installer/kickstart.sh @@ -26,6 +26,7 @@ KICKSTART_SOURCE="$( echo "$(pwd -P)/${self##*/}" )" PACKAGES_SCRIPT="https://raw.githubusercontent.com/netdata/netdata/master/packaging/installer/install-required-packages.sh" +DEFAULT_PLUGIN_PACKAGES="netdata-plugin-go netdata-plugin-python netdata-plugin-apps netdata-plugin-ebpf" PATH="${PATH}:/usr/local/bin:/usr/local/sbin" PUBLIC_CLOUD_URL="https://app.netdata.cloud" REPOCONFIG_DEB_URL_PREFIX="https://repo.netdata.cloud/repos/repoconfig" @@ -1398,6 +1399,9 @@ try_package_install() { common_rpm_opts common_dnf_opts repo_prefix="el/${SYSVERSION}" + if [ "${SYSVERSION}" -lt 8 ]; then + explicitly_install_native_plugins=1 + fi ;; fedora|ol) common_rpm_opts @@ -1533,6 +1537,14 @@ try_package_install() { fi return 2 fi + + if [ -n "${explicitly_install_native_plugins}" ]; then + progress "Installing external plugins." + # shellcheck disable=SC2086 + if ! run_as_root env ${env} ${pm_cmd} install ${DEFAULT_PLUGIN_PACKAGES}; then + warning "Failed to install external plugin packages. Some collectors may not be available." + fi + fi } # ====================================================================== From f60c9e0953d235104a7d152628f98bf2a568f85b Mon Sep 17 00:00:00 2001 From: "Austin S. Hemmelgarn" Date: Tue, 30 May 2023 12:54:13 -0400 Subject: [PATCH 21/23] Improve some of the error messages in the kickstart script. (#15061) --- packaging/installer/kickstart.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packaging/installer/kickstart.sh b/packaging/installer/kickstart.sh index 4936a40fe4c0a7..689fc1efc8d564 100755 --- a/packaging/installer/kickstart.sh +++ b/packaging/installer/kickstart.sh @@ -719,7 +719,7 @@ confirm_root_support() { fi if [ -z "${ROOTCMD}" ]; then - fatal "We need root privileges to continue, but cannot find a way to gain them (we support sudo, doas, and pkexec). Either re-run this script as root, or set \$ROOTCMD to a command that can be used to gain root privileges." F0201 + fatal "This script needs root privileges to install Netdata, but cannot find a way to gain them (we support sudo, doas, and pkexec). Either re-run this script as root, or set \$ROOTCMD to a command that can be used to gain root privileges." F0201 fi fi } @@ -1344,7 +1344,7 @@ common_dnf_opts() { } try_package_install() { - failed_refresh_msg="Failed to refresh repository metadata. ${BADNET_MSG} or by misconfiguration of one or more rpackage repositories in the system package manager configuration." + failed_refresh_msg="Failed to refresh repository metadata. ${BADNET_MSG} or incompatibilities with one or more third-party package repositories in the system package manager configuration." if [ -z "${DISTRO_COMPAT_NAME}" ] || [ "${DISTRO_COMPAT_NAME}" = "unknown" ]; then warning "Unable to determine Linux distribution for native packages." @@ -1487,7 +1487,7 @@ try_package_install() { if [ -n "${repo_subcmd}" ]; then # shellcheck disable=SC2086 if ! run_as_root env ${env} ${pm_cmd} ${repo_subcmd} ${repo_update_opts}; then - fatal "${failed_refresh_msg}" F0205 + fatal "${failed_refresh_msg} In most cases, disabling any third-party repositories on the system and re-running the installer with the same options should work. If that does not work, consider using a static build with the --static-only option instead of native packages." F0205 fi fi else From d3dc461f0ee504dc41cd7f0370690363aa7694ef Mon Sep 17 00:00:00 2001 From: Hugo Valente <82235632+hugovalente-pm@users.noreply.github.com> Date: Tue, 30 May 2023 18:52:54 +0100 Subject: [PATCH 22/23] initial draft for the silencing docs (#15112) * initial draft for the silencing docs * minor fixes upon local review --- ...nage-alert-notification-silencing-rules.md | 58 +++++++++++++++++++ .../manage-notification-methods.md | 5 +- .../alerts-notifications/notifications.md | 42 +++++++++++--- docs/cloud/manage/plans.md | 8 ++- docs/cloud/manage/role-based-access.md | 7 +++ 5 files changed, 109 insertions(+), 11 deletions(-) create mode 100644 docs/cloud/alerts-notifications/manage-alert-notification-silencing-rules.md diff --git a/docs/cloud/alerts-notifications/manage-alert-notification-silencing-rules.md b/docs/cloud/alerts-notifications/manage-alert-notification-silencing-rules.md new file mode 100644 index 00000000000000..05220ad82983c4 --- /dev/null +++ b/docs/cloud/alerts-notifications/manage-alert-notification-silencing-rules.md @@ -0,0 +1,58 @@ +# Manage alert notification silencing rules + +From the Cloud interface, you can manage your space's alert notification silencing rules settings as well as allow users to define their personal ones. + +## Prerequisites + +To manage **space's alert notification silencing rule settings**, you will need the following: + +- A Netdata Cloud account +- Access to the space as an **administrator** or **manager** (**troubleshooters** can only view space rules) + + +To manage your **personal alert notification silencing rule settings**, you will need the following: + +- A Netdata Cloud account +- Access to the space with any roles except **billing** + +### Steps + +1. Click on the **Space settings** cog (located above your profile icon) +1. Click on the **Alert & Notification** tab on the left hand-side +1. Click on the **Notification Silencing Rules** tab +1. You will be presented with a table of the configured alert notification silencing rules for: + * the space (if aren't an **observer**) + * yourself + + You will be able to: + 1. **Add a new** alert notification silencing rule configuration. + - Choose if it applies to **All users** or **Myself** (All users is only available for **administrators** and **managers**) + - You need to provide a name for the configuration so you can easily refer to it + - Define criteria for Nodes: To which Rooms will this apply? What Nodes? Does it apply to host labels key-value pairs? + - Define criteria for Alerts: Which alert name is being targeted? What alert context? Will it apply to a specific alert role? + - Define when it will be applied: + - Immediately, from now till until it is turned off or until a specific duration (start and end date automatically set) + - Scheduled, you specify the start and end time for when the rule becomes active and then inactive (time is set according to your browser local timezone) + Note: You are only able to add a rule if your space is on a [paid plan](https://github.com/netdata/netdata/edit/master/docs/cloud/manage/plans.md). + 1. **Edit an existing** alert notification silencing rule configurations. You will be able to change: + - The name provided for it + - Who it applies to + - Selection criteria for Nodes and Alert + - When it will be applied + 1. **Enable/Disable** a given alert notification silencing rule configuration. + - Use the toggle to enable or disable + 1. **Delete an existing** alert notification silencing rule. + - Use the trash icon to delete your configuration + +## Silencing rules examples + +| Rule name | War Rooms | Nodes | Host Label | Alert name | Alert context | Alert role | Description | +| :-- | :-- | :-- | :-- | :-- | :-- | :-- | :--| +| Space silencing | All Rooms | * | * | * | * | * | This rule silences the entire space, targets all nodes and for all users. E.g. infrastructure wide maintenance window. | +| DB Servers Rooms | PostgreSQL Servers | * | * | * | * | * | This rules silences the nodes in the room named PostgreSQL Servers, for example it doesn't silence the `All Nodes` room. E.g. My team with membership to this room doesn't want to receive notifications for these nodes. | +| Node child1 | All Rooms | `child1` | * | * | * | * | This rule silences all alert state transitions for node `child1` on all rooms and for all users. E.g. node could be going under maintenance. | +| Production nodes | All Rooms | * | `environment:production` | * | * | * | This rule silences all alert state transitions for nodes with the host label key-value pair `environment:production`. E.g. Maintenance window on nodes with specific host labels. | +| Third party maintenance | All Rooms | * | * | `httpcheck_posthog_netdata_cloud.request_status` | * | * | This rule silences this specific alert since third party partner will be undergoing maintenance. | +| Intended stress usage on CPU | All Rooms | * | * | * | `system.cpu` | * | This rule silences specific alerts across all nodes and their CPU cores. | +| Silence role webmaster | All Rooms | * | * | * | * | `webmaster` | This rule silences all alerts configured with the role `webmaster`. | +| Silence alert on node | All Rooms | `child1` | * | `httpcheck_posthog_netdata_cloud.request_status` | * | * | * | This rule silences the specific alert on the `child1` node. | diff --git a/docs/cloud/alerts-notifications/manage-notification-methods.md b/docs/cloud/alerts-notifications/manage-notification-methods.md index 17c7f879af5f4a..f61b6bf6f7e988 100644 --- a/docs/cloud/alerts-notifications/manage-notification-methods.md +++ b/docs/cloud/alerts-notifications/manage-notification-methods.md @@ -27,7 +27,8 @@ Notes: ### Steps 1. Click on the **Space settings** cog (located above your profile icon) -1. Click on the **Notification** tab +1. Click on the **Alerts & Notification** tab on the left hand-side +1. Click on the **Notification Methods** tab 1. You will be presented with a table of the configured notification methods for the space. You will be able to: 1. **Add a new** notification method configuration. - Choose the service from the list of the available ones, you'll may see a list of unavailable options if your plan doesn't allow some of them (you will see on the @@ -42,7 +43,7 @@ Notes: - Service specific inputs 1. **Enable/Disable** a given notification method configuration. - Use the toggle to enable or disable the notification method configuration - 1. **Delete an existing** notification method configuartion. Netdata provided ones can't be deleted, e.g. Email + 1. **Delete an existing** notification method configuration. Netdata provided ones can't be deleted, e.g. Email - Use the trash icon to delete your configuration ## Manage user notification settings diff --git a/docs/cloud/alerts-notifications/notifications.md b/docs/cloud/alerts-notifications/notifications.md index 94cd2dc3fe0d8b..ad115d43f40f93 100644 --- a/docs/cloud/alerts-notifications/notifications.md +++ b/docs/cloud/alerts-notifications/notifications.md @@ -31,7 +31,7 @@ or add new alert that you see in Netdata Cloud, and receive via centralized aler -### Alert notifications +## Alert notifications Netdata Cloud can send centralized alert notifications to your team whenever a node enters a warning, critical, or unreachable state. By enabling notifications, you ensure no alert, on any node in your infrastructure, goes unnoticed by you or your team. @@ -51,9 +51,9 @@ All users in a Space can personalize their notifications settings, for Personal > ⚠️ Netdata Cloud supports different notification methods and their availability will depend on the plan you are at. > For more details check [Service classification](#service-classification) or [netdata.cloud/pricing](https://www.netdata.cloud/pricing). -#### Service level +### Service level -##### Personal +#### Personal The notifications methods classified as **Personal** are what we consider generic, meaning that these can't have specific rules for them set by the administrators. @@ -63,7 +63,7 @@ manage what specific configurations they want for the Space / Room(s) and the de One example of such a notification method is the E-mail. -##### System +#### System For **System** notification methods, the destination of the channel will be a target that usually isn't specific to a single user, e.g. slack channel. @@ -72,23 +72,49 @@ different targets depending on Rooms or Notification level settings. Some examples of such notification methods are: Webhook, PagerDuty, Slack. -#### Service classification +### Service classification -##### Community +#### Community Notification methods classified as Community can be used by everyone independent on the plan your space is at. These are: Email and discord -##### Pro +#### Pro Notification methods classified as Pro are only available for **Pro** and **Business** plans These are: webhook -##### Business +#### Business Notification methods classified as Business are only available for **Business** plans These are: PagerDuty, Slack, Opsgenie +## Silencing Alert notifications + +Netdata Cloud provides you a Silencing Rule engine which allows you to mute alert notifications. This muting action is specific to alert state transition notifications, it doesn't include node unreachable state transitions. + +The Silencing Rule engine is flexible and allows you to enter silence rules for the two main entities involved on alert notifications and can be set using different attributes. The main entities you can enter are **Nodes** and **Alerts** which can be used in combination or isolation to target specific needs - see some examples [here](https://github.com/netdata/netdata/blob/master/docs/cloud/alerts-notifications/manage-alert-notification-silencing-rules.md#silencing-rules-examples). + +### Scope definition for Nodes +* **Space:** silencing the space, selecting `All Rooms`, silences all alert state transitions from any node claimed to the space. +* **War Room:** silencing a specific room will silence all alert state transitions from any node in that room. Please note if the node belongs to +another room which isn't silenced it can trigger alert notifications to the users with membership to that other room. +* **Node:** silencing a specific node can be done for the entire space, selecting `All Rooms`, or for specific war room(s). The main difference is +if the node should be silenced for the entire space or just for specific rooms (when specific rooms are selected only users with membership to that room won't receive notifications). + +### Scope definition for Alerts +* **Alert name:** silencing a specific alert name silences all alert state transitions for that specific alert. +* **Alert context:** silencing a specific alert context will silence all alert state transitions for alerts targeting that chart context, for more details check [alert configuration docs](https://github.com/netdata/netdata/blob/master/health/REFERENCE.md#alarm-line-on). +* **Alert role:** silencing a specific alert role will silence all the alert state transitions for alerts that are configured to be specific role recipients, for more details check [alert configuration docs](https://github.com/netdata/netdata/blob/master/health/REFERENCE.md#alarm-line-to). + +Beside the above two main entities there are another two important settings that you can define on a silencing rule: +* Who does the rule affect? **All user** in the space or **Myself** +* When does is to apply? **Immediately** or on a **Schedule** (when setting immediately you can set duration) + +For further help on setting alert notification silencing rules go to [Manage Alert Notification Silencing Rules](https://github.com/netdata/netdata/blob/master/docs/cloud/alerts-notifications/manage-alert-notification-silencing-rules.md). + +> ⚠️ This feature is only available for [Netdata paid plans](https://github.com/netdata/netdata/edit/master/docs/cloud/manage/plans.md). + ## Flood protection If a node has too many state changes like firing too many alerts or going from reachable to unreachable, Netdata Cloud diff --git a/docs/cloud/manage/plans.md b/docs/cloud/manage/plans.md index 6b4432f9ebb8a3..14a55d5dbb4c24 100644 --- a/docs/cloud/manage/plans.md +++ b/docs/cloud/manage/plans.md @@ -101,7 +101,13 @@ The plan on your space will determine what type of notifications methods will be * **Pro** - Email, Discord and webhook * **Business** - Unlimited, this includes Slack, PagerDuty, Opsgenie etc. -For mode details check the documentation under [Alert Notifications](https://github.com/netdata/netdata/blob/master/docs/cloud/alerts-notifications/notifications.md). +For mode details check the documentation under [Alert Notifications](https://github.com/netdata/netdata/blob/master/docs/cloud/alerts-notifications/notifications.md#alert-notifications). + +##### Alert notification silencing rules + +The plan on your space will determine if you are able to add alert notification silencing rules since this feature will only be available for paid plans: **Pro** or **Business**. + +For mode details check the documentation under [Alert Notifications](https://github.com/netdata/netdata/blob/master/docs/cloud/alerts-notifications/notifications.md#silencing-alert-notifications). ### Related Topics diff --git a/docs/cloud/manage/role-based-access.md b/docs/cloud/manage/role-based-access.md index 1696e0964d598e..a0b387749d5606 100644 --- a/docs/cloud/manage/role-based-access.md +++ b/docs/cloud/manage/role-based-access.md @@ -84,6 +84,13 @@ In more detail, you can find on the following tables which functionalities are a | Edit configuration | :heavy_check_mark: | - | - | - | - | - | Some exceptions apply depending on [service level](https://github.com/netdata/netdata/blob/master/docs/cloud/alerts-notifications/manage-notification-methods.md#available-actions-per-notification-methods-based-on-service-level) | | Delete configuration | :heavy_check_mark: | - | - | - | - | - | | | Edit personal level notification settings | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | [Manage user notification settings](https://github.com/netdata/netdata/blob/master/docs/cloud/alerts-notifications/manage-notification-methods.md#manage-user-notification-settings) | +| See space alert notification silencing rules | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | - | - | - | | +| Add new space alert notification silencing rule | :heavy_check_mark: | :heavy_check_mark: | - | - | - | - | | +| Enable/Disable space alert notification silencing rule | :heavy_check_mark: | :heavy_check_mark: | - | - | - | - | | +| Edit space alert notification silencing rule | :heavy_check_mark: | :heavy_check_mark: | - | - | - | - | | +| Delete space alert notification silencing rule | :heavy_check_mark: | :heavy_check_mark: | - | - | - | - | | +| See, add, edit or delete personal level alert notification silencing rule | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | - | - | | + Notes: * Enable, Edit and Add actions over specific notification methods will only be allowed if your plan has access to those ([service classification](https://github.com/netdata/netdata/blob/master/docs/cloud/alerts-notifications/notifications.md#service-classification)) From 8a6f6999fe991196357f66a9edde10c6d746e084 Mon Sep 17 00:00:00 2001 From: "Austin S. Hemmelgarn" Date: Tue, 30 May 2023 14:47:15 -0400 Subject: [PATCH 23/23] Fix handling of eBPF plugin for DEB packages. (#15117) --- contrib/debian/control | 4 ++-- contrib/debian/rules | 27 ++++++++++++++------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/contrib/debian/control b/contrib/debian/control index f4a767b3207896..1c1e10b14fe16d 100644 --- a/contrib/debian/control +++ b/contrib/debian/control @@ -99,7 +99,7 @@ Description: The charts.d metrics collection plugin for the Netdata Agent LibreSWAN, OpenSIPS, and Wireless access point statistics. Package: netdata-plugin-ebpf -Architecture: any +Architecture: amd64 Depends: ${shlibs:Depends}, netdata (>= ${source:Version}) Recommends: netdata-ebpf-code-legacy (>= ${source:Version}), @@ -110,7 +110,7 @@ Description: The eBPF metrics collection plugin for the Netdata Agent detailed kernel-level metrics for the system. Package: netdata-ebpf-code-legacy -Architecture: i386 amd64 +Architecture: amd64 Depends: netdata-plugin-ebpf (= ${source:Version}) Conflicts: netdata (< ${source:Version}) Description: Compiled eBPF legacy code for the Netdata eBPF plugin diff --git a/contrib/debian/rules b/contrib/debian/rules index 626b5a69ff4e1e..bf80870dec1c63 100755 --- a/contrib/debian/rules +++ b/contrib/debian/rules @@ -15,7 +15,7 @@ else SYSTEMD_UNIT = system/systemd/netdata.service endif -ifeq ($(shell test `uname -m` != "x86_64" && echo "1"), 1) +ifeq ($(shell test ${DEB_TARGET_ARCH} != "amd64" && echo "1"), 1) HAVE_EBPF = 0 EBPF_CONFIG = --disable-ebpf else @@ -82,14 +82,13 @@ override_dh_install: $(TOP)-plugin-chartsd/usr/lib/netdata/conf.d/charts.d # Add ebpf plugin install rules - [ $(HAVE_EBPF) -eq 1 ] && mkdir -p $(TOP)-plugin-ebpf/usr/libexec/netdata/plugins.d/ - [ $(HAVE_EBPF) -eq 1 ] && mkdir -p $(TOP)-plugin-ebpf/usr/lib/netdata/conf.d/ - [ $(HAVE_EBPF) -eq 1 ] && mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/ebpf.plugin \ - $(TOP)-plugin-ebpf/usr/libexec/netdata/plugins.d/ebpf.plugin - [ $(HAVE_EBPF) -eq 1 ] && mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/ebpf.d.conf \ - $(TOP)-plugin-ebpf/usr/lib/netdata/conf.d/ebpf.d.conf - [ $(HAVE_EBPF) -eq 1 ] && mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/ebpf.d \ - $(TOP)-plugin-ebpf/usr/lib/netdata/conf.d/ebpf.d + if [ $(HAVE_EBPF) -eq 1 ]; then \ + mkdir -p $(TOP)-plugin-ebpf/usr/libexec/netdata/plugins.d/; \ + mkdir -p $(TOP)-plugin-ebpf/usr/lib/netdata/conf.d/; \ + mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/ebpf.plugin $(TOP)-plugin-ebpf/usr/libexec/netdata/plugins.d/ebpf.plugin; \ + mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/ebpf.d.conf $(TOP)-plugin-ebpf/usr/lib/netdata/conf.d/ebpf.d.conf; \ + mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/ebpf.d $(TOP)-plugin-ebpf/usr/lib/netdata/conf.d/ebpf.d; \ + fi # Add python plugin install rules mkdir -p $(TOP)-plugin-pythond/usr/libexec/netdata/plugins.d/ @@ -145,8 +144,10 @@ override_dh_install: # Handle eBPF code # - [ $(HAVE_EBPF) -eq 1 ] && mkdir -p $(TOP)-plugin-ebpf-code/usr/libexec/netdata/plugins.d/ - [ $(HAVE_EBPF) -eq 1 ] && packaging/bundle-ebpf.sh . ${TOP}-plugin-ebpf-code/usr/libexec/netdata/plugins.d/ + if [ $(HAVE_EBPF) -eq 1 ]; then \ + mkdir -p $(TOP)-ebpf-code-legacy/usr/libexec/netdata/plugins.d/; \ + packaging/bundle-ebpf.sh . ${TOP}-ebpf-code-legacy/usr/libexec/netdata/plugins.d/; \ + fi # Install go to it's own package directory # @@ -215,5 +216,5 @@ override_dh_clean: # Tidy up copied/generated files # - -[ -r $(CURDIR)/debian/netdata.logrotate ] && rm $(CURDIR)/debian/netdata.logrotate - -[ -r $(CURDIR)/debian/netdata.conffiles ] && rm $(CURDIR)/debian/netdata.conffiles + [ -r $(CURDIR)/debian/netdata.logrotate ] && rm $(CURDIR)/debian/netdata.logrotate ; true + [ -r $(CURDIR)/debian/netdata.conffiles ] && rm $(CURDIR)/debian/netdata.conffiles ; true