From cd1533d79cfea02d005b458bd70fd9675846c9c6 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 23 Apr 2024 15:28:49 +0800 Subject: [PATCH] engine: supply the metric of the compression ratio on write. (#358) * engine: add extra metrics on the compression ratio of write. Signed-off-by: lucasliang * Calibrate the version of grcov. Signed-off-by: lucasliang * Fix clippy errs. Signed-off-by: lucasliang * Fix format errs. Signed-off-by: lucasliang * Calibrate rustc version. Signed-off-by: lucasliang * Fix clippy errs. Signed-off-by: lucasliang * Fix clippy errors. Signed-off-by: lucasliang * Add change logs. Signed-off-by: lucasliang * Format ci actions. Signed-off-by: lucasliang * Fix ci errors. Signed-off-by: lucasliang * Polish rust.yml. Signed-off-by: lucasliang --------- Signed-off-by: lucasliang --- .github/workflows/rust.yml | 3 +-- CHANGELOG.md | 4 ++++ Cargo.toml | 30 ++++++++++++++---------------- ctl/Cargo.toml | 5 ++++- src/engine.rs | 3 ++- src/file_pipe_log/mod.rs | 2 +- src/log_batch.rs | 28 +++++++++++++++++----------- src/metrics.rs | 6 ++++++ src/util.rs | 14 ++++++++++---- 9 files changed, 59 insertions(+), 36 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index a6d2ee09..1ca6f12f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -98,8 +98,7 @@ jobs: - name: Install grcov run: if [[ ! -e ~/.cargo/bin/grcov ]]; then cargo install --locked grcov; fi - name: Run tests - run: | - make test_matrix + run: make test_matrix env: RUSTFLAGS: '-Cinstrument-coverage' LLVM_PROFILE_FILE: '%p-%m.profraw' diff --git a/CHANGELOG.md b/CHANGELOG.md index 7531efab..fb0c9b30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### New Features + +* Add a new Prometheus metric `raft_engine_write_compression_ratio` to track compression ratio of write #358 + ## [0.4.2] - 2024-04-16 ### Behavior Changes diff --git a/Cargo.toml b/Cargo.toml index 6694e705..aa6669ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,7 +42,10 @@ hex = "0.4" if_chain = "1.0" lazy_static = "1.3" libc = "0.2" -log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } +log = { version = "0.4", features = [ + "max_level_trace", + "release_max_level_debug", +] } lz4-sys = "1.9" memmap2 = { version = "0.9", optional = true } nix = "0.26" @@ -64,8 +67,12 @@ thiserror = "1.0" criterion = "0.4" ctor = "0.2" env_logger = "0.10" -kvproto = { git = "https://github.com/pingcap/kvproto.git", default-features = false, features = ["protobuf-codec"] } -raft = { git = "https://github.com/tikv/raft-rs", branch = "master", default-features = false, features = ["protobuf-codec"] } +kvproto = { git = "https://github.com/pingcap/kvproto.git", default-features = false, features = [ + "protobuf-codec", +] } +raft = { git = "https://github.com/tikv/raft-rs", branch = "master", default-features = false, features = [ + "protobuf-codec", +] } rand = "0.8" rand_distr = "0.4" tempfile = "3.6" @@ -74,19 +81,10 @@ toml = "0.8" [features] default = ["internals", "scripting"] internals = [] -nightly = [ - "prometheus/nightly", -] -failpoints = [ - "fail/failpoints", -] -scripting = [ - "rhai", -] -swap = [ - "nightly", - "memmap2", -] +nightly = ["prometheus/nightly"] +failpoints = ["fail/failpoints"] +scripting = ["rhai"] +swap = ["nightly", "memmap2"] std_fs = [] nightly_group = ["nightly", "swap"] diff --git a/ctl/Cargo.toml b/ctl/Cargo.toml index 4c97eefb..2071705c 100644 --- a/ctl/Cargo.toml +++ b/ctl/Cargo.toml @@ -11,4 +11,7 @@ license = "Apache-2.0" [dependencies] clap = { version = "3.1", features = ["derive", "cargo"] } env_logger = "0.10" -raft-engine = { path = "..", version = "0.4.2", features = ["scripting", "internals"] } +raft-engine = { path = "..", version = "0.4.1", features = [ + "scripting", + "internals", +] } diff --git a/src/engine.rs b/src/engine.rs index 1a09b397..10416a29 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -142,7 +142,7 @@ where return Ok(0); } let start = Instant::now(); - let len = log_batch.finish_populate( + let (len, compression_ratio) = log_batch.finish_populate( self.cfg.batch_compression_threshold.0 as usize, self.cfg.compression_level, )?; @@ -225,6 +225,7 @@ where now = end; ENGINE_WRITE_DURATION_HISTOGRAM.observe(now.saturating_duration_since(start).as_secs_f64()); ENGINE_WRITE_SIZE_HISTOGRAM.observe(len as f64); + ENGINE_WRITE_COMPRESSION_RATIO_HISTOGRAM.observe(compression_ratio); Ok(len) } diff --git a/src/file_pipe_log/mod.rs b/src/file_pipe_log/mod.rs index 64042e01..c65515a3 100644 --- a/src/file_pipe_log/mod.rs +++ b/src/file_pipe_log/mod.rs @@ -218,7 +218,7 @@ pub mod debug { let log_file_format = LogFileContext::new(file_id, Version::default()); for batch in bs.iter_mut() { let offset = writer.offset() as u64; - let len = batch + let (len, _) = batch .finish_populate(1 /* compression_threshold */, None) .unwrap(); batch.prepare_write(&log_file_format).unwrap(); diff --git a/src/log_batch.rs b/src/log_batch.rs index c6ce147c..537609dc 100644 --- a/src/log_batch.rs +++ b/src/log_batch.rs @@ -767,28 +767,32 @@ impl LogBatch { &mut self, compression_threshold: usize, compression_level: Option, - ) -> Result { + ) -> Result<(usize, f64)> { let _t = StopWatch::new(perf_context!(log_populating_duration)); debug_assert!(self.buf_state == BufState::Open); if self.is_empty() { self.buf_state = BufState::Encoded(self.buf.len(), 0); - return Ok(0); + return Ok((0, 0.0)); } self.buf_state = BufState::Incomplete; // entries - let (header_offset, compression_type) = if compression_threshold > 0 + let (header_offset, compression_type, compression_ratio) = if compression_threshold > 0 && self.buf.len() >= LOG_BATCH_HEADER_LEN + compression_threshold { let buf_len = self.buf.len(); - lz4::append_compress_block( + let compression_ratio = lz4::append_compress_block( &mut self.buf, LOG_BATCH_HEADER_LEN, compression_level.unwrap_or(lz4::DEFAULT_LZ4_COMPRESSION_LEVEL), )?; - (buf_len - LOG_BATCH_HEADER_LEN, CompressionType::Lz4) + ( + buf_len - LOG_BATCH_HEADER_LEN, + CompressionType::Lz4, + compression_ratio, + ) } else { - (0, CompressionType::None) + (0, CompressionType::None, 0.0) }; // checksum @@ -830,7 +834,7 @@ impl LogBatch { } self.buf_state = BufState::Encoded(header_offset, footer_roffset - LOG_BATCH_HEADER_LEN); - Ok(self.buf.len() - header_offset) + Ok((self.buf.len() - header_offset, compression_ratio)) } /// Make preparations for the write of `LogBatch`. @@ -1328,7 +1332,7 @@ mod tests { offset: 0, }; let old_approximate_size = batch.approximate_size(); - let len = batch.finish_populate(usize::from(compress), None).unwrap(); + let (len, _) = batch.finish_populate(usize::from(compress), None).unwrap(); assert!(old_approximate_size >= len); assert_eq!(batch.approximate_size(), len); let mut batch_handle = mocked_file_block_handle; @@ -1493,7 +1497,7 @@ mod tests { batch1.merge(&mut batch2).unwrap(); assert!(batch2.is_empty()); - let len = batch1.finish_populate(0, None).unwrap(); + let (len, _) = batch1.finish_populate(0, None).unwrap(); batch1.prepare_write(&file_context).unwrap(); let encoded = batch1.encoded_bytes(); assert_eq!(len, encoded.len()); @@ -1549,7 +1553,8 @@ mod tests { offset: 0, }; let buf_len = batch.buf.len(); - let len = batch.finish_populate(1, None).unwrap(); + let (len, compression_ratio) = batch.finish_populate(1, None).unwrap(); + assert!(compression_ratio == 0.0); assert!(len == 0); assert_eq!(batch.buf_state, BufState::Encoded(buf_len, 0)); let file_context = LogFileContext::new(mocked_file_block_handles.id, Version::V2); @@ -1671,7 +1676,8 @@ mod tests { }, ]; let old_approximate_size = batch.approximate_size(); - let len = batch.finish_populate(1, None).unwrap(); + let (len, compression_ratio) = batch.finish_populate(1, None).unwrap(); + assert!(compression_ratio > 0.0); assert!(old_approximate_size >= len); assert_eq!(batch.approximate_size(), len); let checksum = batch.item_batch.checksum; diff --git a/src/metrics.rs b/src/metrics.rs index 6ca10940..3fcf692c 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -201,6 +201,12 @@ lazy_static! { exponential_buckets(256.0, 1.8, 22).unwrap() ) .unwrap(); + pub static ref ENGINE_WRITE_COMPRESSION_RATIO_HISTOGRAM: Histogram = register_histogram!( + "raft_engine_write_compression_ratio", + "Bucketed histogram of Raft Engine write compression ratio", + exponential_buckets(0.0005, 1.8, 16).unwrap() + ) + .unwrap(); pub static ref LOG_ALLOCATE_DURATION_HISTOGRAM: Histogram = register_histogram!( "raft_engine_allocate_log_duration_seconds", "Bucketed histogram of Raft Engine allocate log duration", diff --git a/src/util.rs b/src/util.rs index 2e35a83e..7e1d09c0 100644 --- a/src/util.rs +++ b/src/util.rs @@ -226,9 +226,10 @@ pub mod lz4 { pub const DEFAULT_LZ4_COMPRESSION_LEVEL: usize = 1; /// Compress content in `buf[skip..]`, and append output to `buf`. - pub fn append_compress_block(buf: &mut Vec, skip: usize, level: usize) -> Result<()> { + pub fn append_compress_block(buf: &mut Vec, skip: usize, level: usize) -> Result { let buf_len = buf.len(); let content_len = buf_len - skip; + let mut compression_ratio = 0.0; if content_len > 0 { if content_len > i32::MAX as usize { return Err(Error::InvalidArgument(format!( @@ -256,10 +257,11 @@ pub mod lz4 { if compressed == 0 { return Err(Error::Other(box_err!("Compression failed"))); } + compression_ratio = compressed as f64 / content_len as f64; buf.set_len(buf_len + 4 + compressed as usize); } } - Ok(()) + Ok(compression_ratio) } pub fn decompress_block(src: &[u8]) -> Result> { @@ -301,8 +303,12 @@ pub mod lz4 { let vecs: Vec> = vec![b"".to_vec(), b"123".to_vec(), b"12345678910".to_vec()]; for mut vec in vecs.into_iter() { let uncompressed_len = vec.len(); - super::append_compress_block(&mut vec, 0, super::DEFAULT_LZ4_COMPRESSION_LEVEL) - .unwrap(); + let compression_ratio = + super::append_compress_block(&mut vec, 0, super::DEFAULT_LZ4_COMPRESSION_LEVEL) + .unwrap(); + if uncompressed_len == 0 { + assert_eq!(compression_ratio, 0.0); + } let res = super::decompress_block(&vec[uncompressed_len..]).unwrap(); assert_eq!(res, vec[..uncompressed_len].to_owned()); }