Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add KubeDB specific configuration files #572

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion documentation/sql_exporter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ target:
name: mssql_database
# Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
# the schema gets dropped or replaced to match the driver expected DSN format.
data_source_name: 'sqlserver://prom_user:[email protected]:1433'
data_source_name: 'sqlserver://prom_user:[email protected]:1433/master'

# Collectors (referenced by name) to execute on the target.
collectors: [mssql_standard]
Expand Down
207 changes: 207 additions & 0 deletions kubedb/mssql_standard.collector.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
# A collector defining standard metrics for Microsoft SQL Server.
#
# It is required that the SQL Server user has the following permissions:
#
# GRANT VIEW ANY DEFINITION TO
# GRANT VIEW SERVER STATE TO
#
collector_name: mssql_standard

# Similar to global.min_interval, but applies to the queries defined by this collector only.
#min_interval: 0s

metrics:
- metric_name: mssql_local_time_seconds
type: gauge
help: 'Local time in seconds since epoch (Unix time).'
values: [unix_time]
query: |
SELECT DATEDIFF(second, '19700101', GETUTCDATE()) AS unix_time

- metric_name: mssql_connections
type: gauge
help: 'Number of active connections.'
key_labels:
- db
values: [count]
query: |
SELECT DB_NAME(sp.dbid) AS db, COUNT(sp.spid) AS count
FROM sys.sysprocesses sp
GROUP BY DB_NAME(sp.dbid)

#
# Collected from sys.dm_os_performance_counters
#
- metric_name: mssql_deadlocks
type: counter
help: 'Number of lock requests that resulted in a deadlock.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Number of Deadlocks/sec' AND instance_name = '_Total'

- metric_name: mssql_user_errors
type: counter
help: 'Number of user errors.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Errors/sec' AND instance_name = 'User Errors'

- metric_name: mssql_kill_connection_errors
type: counter
help: 'Number of severe errors that caused SQL Server to kill the connection.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Errors/sec' AND instance_name = 'Kill Connection Errors'

- metric_name: mssql_page_life_expectancy_seconds
type: gauge
help: 'The minimum number of seconds a page will stay in the buffer pool on this node without references.'
values: [cntr_value]
query: |
SELECT top(1) cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Page life expectancy'

- metric_name: mssql_batch_requests
type: counter
help: 'Number of command batches received.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Batch Requests/sec'

- metric_name: mssql_log_growths
type: counter
help: 'Number of times the transaction log has been expanded, per database.'
key_labels:
- db
values: [cntr_value]
query: |
SELECT rtrim(instance_name) AS db, cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Log Growths' AND instance_name <> '_Total'

- metric_name: mssql_buffer_cache_hit_ratio
type: gauge
help: 'Ratio of requests that hit the buffer cache'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters
WHERE [counter_name] = 'Buffer cache hit ratio'

- metric_name: mssql_checkpoint_pages_sec
type: gauge
help: 'Checkpoint Pages Per Second'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters
WHERE [counter_name] = 'Checkpoint pages/sec'

#
# Collected from sys.dm_io_virtual_file_stats
#
- metric_name: mssql_io_stall_seconds
type: counter
help: 'Stall time in seconds per database and I/O operation.'
key_labels:
- db
value_label: operation
values:
- read
- write
query_ref: mssql_io_stall
- metric_name: mssql_io_stall_total_seconds
type: counter
help: 'Total stall time in seconds per database.'
key_labels:
- db
values:
- io_stall
query_ref: mssql_io_stall

#
# Collected from sys.dm_os_process_memory
#
- metric_name: mssql_resident_memory_bytes
type: gauge
help: 'SQL Server resident memory size (AKA working set).'
values: [resident_memory_bytes]
query_ref: mssql_process_memory

- metric_name: mssql_virtual_memory_bytes
type: gauge
help: 'SQL Server committed virtual memory size.'
values: [virtual_memory_bytes]
query_ref: mssql_process_memory

- metric_name: mssql_memory_utilization_percentage
type: gauge
help: 'The percentage of committed memory that is in the working set.'
values: [memory_utilization_percentage]
query_ref: mssql_process_memory

- metric_name: mssql_page_fault_count
type: counter
help: 'The number of page faults that were incurred by the SQL Server process.'
values: [page_fault_count]
query_ref: mssql_process_memory

#
# Collected from sys.dm_os_sys_memory
#
- metric_name: mssql_os_memory
type: gauge
help: 'OS physical memory, used and available.'
value_label: 'state'
values: [used, available]
query: |
SELECT
(total_physical_memory_kb - available_physical_memory_kb) * 1024 AS used,
available_physical_memory_kb * 1024 AS available
FROM sys.dm_os_sys_memory

- metric_name: mssql_os_page_file
type: gauge
help: 'OS page file, used and available.'
value_label: 'state'
values: [used, available]
query: |
SELECT
(total_page_file_kb - available_page_file_kb) * 1024 AS used,
available_page_file_kb * 1024 AS available
FROM sys.dm_os_sys_memory

queries:
# Populates `mssql_io_stall` and `mssql_io_stall_total`
- query_name: mssql_io_stall
query: |
SELECT
cast(DB_Name(a.database_id) as varchar) AS [db],
sum(io_stall_read_ms) / 1000.0 AS [read],
sum(io_stall_write_ms) / 1000.0 AS [write],
sum(io_stall) / 1000.0 AS io_stall
FROM
sys.dm_io_virtual_file_stats(null, null) a
INNER JOIN sys.master_files b ON a.database_id = b.database_id AND a.file_id = b.file_id
GROUP BY a.database_id

# Populates `mssql_resident_memory_bytes`, `mssql_virtual_memory_bytes`, `mssql_memory_utilization_percentage` and
# `mssql_page_fault_count`.
- query_name: mssql_process_memory
query: |
SELECT
physical_memory_in_use_kb * 1024 AS resident_memory_bytes,
virtual_address_space_committed_kb * 1024 AS virtual_memory_bytes,
memory_utilization_percentage,
page_fault_count
FROM sys.dm_os_process_memory

77 changes: 77 additions & 0 deletions kubedb/sql_exporter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Global settings and defaults.
global:
scrape_timeout: 10s
scrape_timeout_offset: 500ms
min_interval: 0s
max_connections: 3
max_idle_connections: 3

# The target to monitor and the collectors to execute on it.
target:
name: mssql_database
data_source_name: 'server=ms-stand-monitor-0.ms-stand-monitor-pods.demo.svc;user id=sa;password=Pa55w0rd!;database=master;encrypt=true;TrustServerCertificate=true;'
collectors: [mssql_standard]
enable_ping: true

# Collectors and metrics definitions
collectors:
- collector_name: mssql_standard
metrics:
- metric_name: mssql_log_growths
type: counter
help: 'Total number of times the transaction log has been expanded since last restart, per database.'
key_labels:
- db
static_labels:
env: dev
region: europe
values: [counter]
query: |
SELECT rtrim(instance_name) AS db, cntr_value AS counter
FROM sys.dm_os_performance_counters
WHERE counter_name = 'Log Growths' AND instance_name <> '_Total'

- metric_name: mssql_io_stall_seconds
type: counter
help: 'Stall time in seconds per database and I/O operation.'
key_labels:
- db
value_label: operation
values:
- io_stall_read
- io_stall_write
query_ref: io_stall

- metric_name: mssql_io_stall_total_seconds
type: counter
help: 'Total stall time in seconds per database.'
key_labels:
- db
values:
- io_stall
query_ref: io_stall

- metric_name: mssql_hostname
type: gauge
help: 'Database server hostname'
key_labels:
- hostname
static_value: 1
query: |
SELECT @@SERVERNAME AS hostname

queries:
- query_name: io_stall
query: |
SELECT
cast(DB_Name(a.database_id) as varchar) AS db,
sum(io_stall_read_ms) / 1000.0 AS io_stall_read,
sum(io_stall_write_ms) / 1000.0 AS io_stall_write,
sum(io_stall) / 1000.0 AS io_stall
FROM
sys.dm_io_virtual_file_stats(null, null) a
INNER JOIN sys.master_files b ON a.database_id = b.database_id AND a.file_id = b.file_id
GROUP BY a.database_id

collector_files:
- "*.collector.yml"
Loading