Skip to content

Commit

Permalink
Update with KubeDB configuration (#1)
Browse files Browse the repository at this point in the history
Signed-off-by: Neaj Morshad <[email protected]>
  • Loading branch information
Neaj-Morshad-101 authored Aug 14, 2024
1 parent 03f49c6 commit 72f68e3
Show file tree
Hide file tree
Showing 3 changed files with 260 additions and 4 deletions.
11 changes: 7 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@ ARG ARCH="amd64"
ARG OS="linux"
FROM quay.io/prometheus/golang-builder AS builder

# Get sql_exporter
ADD . /go/src/github.com/burningalchemist/sql_exporter
WORKDIR /go/src/github.com/burningalchemist/sql_exporter
# Get sql_exporter fork
ADD . /go/src/github.com/kubedb/mssqlserver_exporter
WORKDIR /go/src/github.com/kubedb/mssqlserver_exporter

# Do makefile
RUN make

# Make image and copy build sql_exporter
FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest
LABEL maintainer="The Prometheus Authors <[email protected]>"
COPY --from=builder /go/src/github.com/burningalchemist/sql_exporter/sql_exporter /bin/sql_exporter
COPY --from=builder /go/src/github.com/kubedb/mssqlserver_exporter/sql_exporter /bin/sql_exporter

# Add kubedb configuration files to /etc/sql-exporter/
ADD kubedb /etc/sql-exporter/

EXPOSE 9399
USER nobody
Expand Down
207 changes: 207 additions & 0 deletions kubedb/mssql_standard.collector.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
# A collector defining standard metrics for Microsoft SQL Server.
#
# It is required that the SQL Server user has the following permissions:
#
# GRANT VIEW ANY DEFINITION TO
# GRANT VIEW SERVER STATE TO
#
collector_name: mssql_standard

# Similar to global.min_interval, but applies to the queries defined by this collector only.
#min_interval: 0s

metrics:
- metric_name: mssql_local_time_seconds
type: gauge
help: 'Local time in seconds since epoch (Unix time).'
values: [unix_time]
query: |
SELECT DATEDIFF(second, '19700101', GETUTCDATE()) AS unix_time
- metric_name: mssql_connections
type: gauge
help: 'Number of active connections.'
key_labels:
- db
values: [count]
query: |
SELECT DB_NAME(sp.dbid) AS db, COUNT(sp.spid) AS count
FROM sys.sysprocesses sp
GROUP BY DB_NAME(sp.dbid)
#
# Collected from sys.dm_os_performance_counters
#
- metric_name: mssql_deadlocks
type: counter
help: 'Number of lock requests that resulted in a deadlock.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Number of Deadlocks/sec' AND instance_name = '_Total'
- metric_name: mssql_user_errors
type: counter
help: 'Number of user errors.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Errors/sec' AND instance_name = 'User Errors'
- metric_name: mssql_kill_connection_errors
type: counter
help: 'Number of severe errors that caused SQL Server to kill the connection.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Errors/sec' AND instance_name = 'Kill Connection Errors'
- metric_name: mssql_page_life_expectancy_seconds
type: gauge
help: 'The minimum number of seconds a page will stay in the buffer pool on this node without references.'
values: [cntr_value]
query: |
SELECT top(1) cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Page life expectancy'
- metric_name: mssql_batch_requests
type: counter
help: 'Number of command batches received.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Batch Requests/sec'
- metric_name: mssql_log_growths
type: counter
help: 'Number of times the transaction log has been expanded, per database.'
key_labels:
- db
values: [cntr_value]
query: |
SELECT rtrim(instance_name) AS db, cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Log Growths' AND instance_name <> '_Total'
- metric_name: mssql_buffer_cache_hit_ratio
type: gauge
help: 'Ratio of requests that hit the buffer cache'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters
WHERE [counter_name] = 'Buffer cache hit ratio'
- metric_name: mssql_checkpoint_pages_sec
type: gauge
help: 'Checkpoint Pages Per Second'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters
WHERE [counter_name] = 'Checkpoint pages/sec'
#
# Collected from sys.dm_io_virtual_file_stats
#
- metric_name: mssql_io_stall_seconds
type: counter
help: 'Stall time in seconds per database and I/O operation.'
key_labels:
- db
value_label: operation
values:
- read
- write
query_ref: mssql_io_stall
- metric_name: mssql_io_stall_total_seconds
type: counter
help: 'Total stall time in seconds per database.'
key_labels:
- db
values:
- io_stall
query_ref: mssql_io_stall

#
# Collected from sys.dm_os_process_memory
#
- metric_name: mssql_resident_memory_bytes
type: gauge
help: 'SQL Server resident memory size (AKA working set).'
values: [resident_memory_bytes]
query_ref: mssql_process_memory

- metric_name: mssql_virtual_memory_bytes
type: gauge
help: 'SQL Server committed virtual memory size.'
values: [virtual_memory_bytes]
query_ref: mssql_process_memory

- metric_name: mssql_memory_utilization_percentage
type: gauge
help: 'The percentage of committed memory that is in the working set.'
values: [memory_utilization_percentage]
query_ref: mssql_process_memory

- metric_name: mssql_page_fault_count
type: counter
help: 'The number of page faults that were incurred by the SQL Server process.'
values: [page_fault_count]
query_ref: mssql_process_memory

#
# Collected from sys.dm_os_sys_memory
#
- metric_name: mssql_os_memory
type: gauge
help: 'OS physical memory, used and available.'
value_label: 'state'
values: [used, available]
query: |
SELECT
(total_physical_memory_kb - available_physical_memory_kb) * 1024 AS used,
available_physical_memory_kb * 1024 AS available
FROM sys.dm_os_sys_memory
- metric_name: mssql_os_page_file
type: gauge
help: 'OS page file, used and available.'
value_label: 'state'
values: [used, available]
query: |
SELECT
(total_page_file_kb - available_page_file_kb) * 1024 AS used,
available_page_file_kb * 1024 AS available
FROM sys.dm_os_sys_memory
queries:
# Populates `mssql_io_stall` and `mssql_io_stall_total`
- query_name: mssql_io_stall
query: |
SELECT
cast(DB_Name(a.database_id) as varchar) AS [db],
sum(io_stall_read_ms) / 1000.0 AS [read],
sum(io_stall_write_ms) / 1000.0 AS [write],
sum(io_stall) / 1000.0 AS io_stall
FROM
sys.dm_io_virtual_file_stats(null, null) a
INNER JOIN sys.master_files b ON a.database_id = b.database_id AND a.file_id = b.file_id
GROUP BY a.database_id
# Populates `mssql_resident_memory_bytes`, `mssql_virtual_memory_bytes`, `mssql_memory_utilization_percentage` and
# `mssql_page_fault_count`.
- query_name: mssql_process_memory
query: |
SELECT
physical_memory_in_use_kb * 1024 AS resident_memory_bytes,
virtual_address_space_committed_kb * 1024 AS virtual_memory_bytes,
memory_utilization_percentage,
page_fault_count
FROM sys.dm_os_process_memory
46 changes: 46 additions & 0 deletions kubedb/sql_exporter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Global settings and defaults.
global:
scrape_timeout: 10s
scrape_timeout_offset: 500ms
min_interval: 0s
max_connections: 3
max_idle_connections: 3

# The target to monitor and the collectors to execute on it.
target:
name: mssql_database
data_source_name: 'sqlserver://prom_user:[email protected]:1433/master'
# Example for KubeDB
# data_source_name: 'sqlserver://username:password@<pod_name>.<gov_svc_name>.<namespace>.svc.cluster.local:1433/master'
collectors: [mssql_*]
enable_ping: true

# Collectors and metrics definitions
collectors:
- collector_name: mssql_sample
metrics:
- metric_name: mssql_log_growths
type: counter
help: 'Total number of times the transaction log has been expanded since last restart, per database.'
key_labels:
- db
static_labels:
env: dev
region: europe
values: [counter]
query: |
SELECT rtrim(instance_name) AS db, cntr_value AS counter
FROM sys.dm_os_performance_counters
WHERE counter_name = 'Log Growths' AND instance_name <> '_Total'
- metric_name: mssql_hostname
type: gauge
help: 'Database server hostname'
key_labels:
- hostname
static_value: 1
query: |
SELECT @@SERVERNAME AS hostname
collector_files:
- "*.collector.yml"

0 comments on commit 72f68e3

Please sign in to comment.