From 4249bb7f14d4baebb57093c97d819053199240b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Tue, 30 Apr 2024 13:23:51 -0700 Subject: [PATCH] Hook up GNU debug link support to DwarfResolver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds debug link support to our DwarfResolver. Specifically, when a DwarfResolver is created, we check whether the opened file contains a debug link. If so, we try to find the target file it represents in a list of known directories and then load the DWARF information from there. If none of the directories contains the file, the debug link will be silently ignored. The reason for this behavior is that many distributions seem to distribute binaries with debug links in them, but then make the linked debug information optional by having it contained in a dedicated package. There are many locations at which this functionality could fit it. Considered candidates are: 1) ElfResolverData::elf_resolver(), 2) ElfResolver itself, and 3) DwarfResolver (the approach taken). 1) is somewhat nice because it keep the existing resolver simple and to the point. However, it is insufficient, because we expose ElfResolver publicly and we most certainly want to enable debug link support for users. Implementation inside ElfResolver is possible, but it seems inferior to putting everything into DwarfResolver: we only intend to follow debug links if DWARF support is enabled and eventually we could consider checking both the original ELF file as well as the linkee ELF file as fallback options when no symbol is found. As such, DwarfResolver seems like the most apt location. Refs: #60 Signed-off-by: Daniel Müller --- CHANGELOG.md | 1 + build.rs | 45 +++++++++++-- src/dwarf/resolver.rs | 149 ++++++++++++++++++++++++++++++++++++++++-- tests/blazesym.rs | 37 +++++++++++ 4 files changed, 221 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1b87b1f..edbc43d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ IDs if `cache_build_ids` is `true` - Added support for compressed debug information - Added `zlib` (default enabled) and `zstd` (default disabled) features +- Added support for transparently following debug links in ELF binaries - Adjusted `Inspector::for_each` signature to no longer carry explicit state around - Introduced `normalize::Reason` enum to provide best guess at why normalization diff --git a/build.rs b/build.rs index e2f65861..dcacb051 100644 --- a/build.rs +++ b/build.rs @@ -5,9 +5,13 @@ use std::ffi::OsStr; use std::ffi::OsString; use std::fs::create_dir_all; use std::fs::hard_link; +use std::fs::remove_file; +use std::fs::write; +use std::fs::File; use std::io::Error; use std::io::ErrorKind; use std::io::Result; +use std::io::Write as _; use std::ops::Deref; use std::path::Path; use std::path::PathBuf; @@ -263,7 +267,6 @@ fn syms(_src: &Path, _dst: impl AsRef) { /// Unpack an xz compressed file. #[cfg(feature = "xz2")] fn unpack_xz(src: &Path, dst: &Path) { - use std::fs::File; use std::io::copy; use xz2::read::XzDecoder; @@ -295,8 +298,6 @@ fn unpack_xz(_src: &Path, _dst: &Path) { #[cfg(feature = "zip")] fn zip(files: &[PathBuf], dst: &Path) { use std::fs::read as read_file; - use std::fs::File; - use std::io::Write as _; use zip::write::SimpleFileOptions; use zip::CompressionMethod; use zip::ZipWriter; @@ -362,6 +363,16 @@ fn cc_stable_addrs(dst: impl AsRef, options: &[&str]) { cc(&src, dst, &args) } +/// Open the file at `path` for writing and append `data` to it. +fn append(path: &Path, data: &[u8]) -> Result<()> { + { + let mut file = File::options().append(true).open(path)?; + let () = file.write_all(data)?; + } + let () = adjust_mtime(path).unwrap(); + Ok(()) +} + /// Prepare the various test files. fn prepare_test_files() { let data_dir = data_dir(); @@ -487,6 +498,31 @@ fn prepare_test_files() { ], ); + dwarf(&src, "test-stable-addrs-dwarf-only-wrong-crc.dbg"); + let dbg = data_dir.join("test-stable-addrs-dwarf-only-wrong-crc.dbg"); + objcopy( + &src, + "test-stable-addrs-stripped-with-link-to-wrong-crc.bin", + &[ + "--strip-all", + &format!("--add-gnu-debuglink={}", dbg.display()), + ], + ); + append(&dbg, &[0]).unwrap(); + + + let dbg = data_dir.join("test-stable-addrs-dwarf-only-non-existent.dbg"); + let () = write(&dbg, [0]).unwrap(); + objcopy( + &src, + "test-stable-addrs-stripped-with-link-non-existent.bin", + &[ + "--strip-all", + &format!("--add-gnu-debuglink={}", dbg.display()), + ], + ); + let () = remove_file(&dbg).unwrap(); + let src = data_dir.join("kallsyms.xz"); let mut dst = src.clone(); assert!(dst.set_extension("")); @@ -519,9 +555,6 @@ fn prepare_test_files() { /// Download a multi-part file split into `part_count` pieces. #[cfg(feature = "reqwest")] fn download_multi_part(base_url: &reqwest::Url, part_count: usize, dst: &Path) { - use std::fs::File; - use std::io::Write as _; - let mut dst = File::create(dst).unwrap(); for part in 1..=part_count { let url = reqwest::Url::parse(&format!("{}.part{part}", base_url.as_str())).unwrap(); diff --git a/src/dwarf/resolver.rs b/src/dwarf/resolver.rs index d629c9b3..61733c86 100644 --- a/src/dwarf/resolver.rs +++ b/src/dwarf/resolver.rs @@ -1,14 +1,15 @@ use std::borrow::Cow; #[cfg(test)] use std::env; +use std::ffi::OsStr; use std::fmt::Debug; use std::fmt::Formatter; use std::fmt::Result as FmtResult; use std::mem; use std::mem::swap; use std::ops::Deref as _; -#[cfg(test)] use std::path::Path; +use std::path::PathBuf; use std::rc::Rc; use gimli::AbbreviationsCacheStrategy; @@ -19,6 +20,8 @@ use crate::error::IntoCowStr; use crate::inspect::FindAddrOpts; use crate::inspect::Inspect; use crate::inspect::SymInfo; +use crate::log::debug; +use crate::log::warn; use crate::symbolize::CodeInfo; use crate::symbolize::FindSymOpts; use crate::symbolize::InlinedFn; @@ -29,9 +32,12 @@ use crate::symbolize::Symbolize; use crate::Addr; use crate::Error; use crate::ErrorExt; +use crate::Mmap; use crate::Result; use crate::SymType; +use super::debug_link::debug_link_crc32; +use super::debug_link::read_debug_link; use super::function::Function; use super::location::Location; use super::reader; @@ -75,15 +81,121 @@ impl From> for SrcLang { } +/// Find a debug file in a list of default directories. +/// +/// `linker` is the path to the file containing the debug link. This function +/// searches a couple of "well-known" locations and then others constructed +/// based on the canonicalized path of `linker`. +/// +/// # Notes +/// This function ignores any errors encountered. +// TODO: Ideally this discovery functionality would be provided in the +// form of an iterator for better testability. +fn find_debug_file(file: &OsStr, linker: Option<&Path>) -> Option { + macro_rules! return_if_exists { + ($path:ident) => { + if $path.exists() { + debug!("found debug info at {}", $path.display()); + return Some($path) + } + }; + } + + // First check known fixed locations. + let path = Path::new("/lib/debug/").join(file); + return_if_exists!(path); + + let path = Path::new("/usr/lib/debug/").join(file); + return_if_exists!(path); + + // Next check others that depend on the absolute `linker` (which may + // not be retrievable). E.g., assuming `linker` is `/usr/lib64/libc.so` and + // `file` is `libc.so.debug`, it would also search: + // - /usr/lib64/libc.so.debug + // - /usr/lib/debug/usr/lib64/libc.so.debug + // - /usr/lib/debug/usr/libc.so.debug + + // TODO: Different heuristics may be possible here. E.g., honor + // .debug directories and check the current working directory + // (??). Also, users could want to pass in a directory. + if let Some(linker) = linker { + if let Ok(mut path) = linker.canonicalize() { + let () = path.set_file_name(file); + return_if_exists!(path); + + let mut ancestors = path.ancestors(); + // Remove the file name, as we will always append it anyway. + let _ = ancestors.next(); + + for ancestor in ancestors { + let mut components = ancestor.components(); + // Remove the root directory to make the path relative. That + // allows for joining to work as expected. + let _ = components.next(); + + // If the remaining path is empty we'd basically just cover + // one of the "fixed" cases above, so we can stop. + if components.as_path().as_os_str().is_empty() { + break + } + + let path = Path::new("/usr/lib/debug/") + .join(components.as_path()) + .join(file); + return_if_exists!(path); + } + } + } + None +} + + +fn try_deref_debug_link(parser: &ElfParser) -> Result>> { + if let Some((file, checksum)) = read_debug_link(parser)? { + match find_debug_file(file, parser.path()) { + Some(path) => { + let mmap = Mmap::builder().open(&path).with_context(|| { + format!("failed to open debug link destination `{}`", path.display()) + })?; + let crc = debug_link_crc32(&mmap); + if crc != checksum { + return Err(Error::with_invalid_data(format!( + "debug link destination `{}` checksum does not match \ + expected one: {crc:x} (actual) != {checksum:x} (expected)", + path.display() + ))) + } + + let dst_parser = Rc::new(ElfParser::from_mmap(mmap, Some(path))); + Ok(Some(dst_parser)) + } + None => { + warn!( + "debug link references destination `{}` which was not found in any known location", + Path::new(file).display(), + ); + Ok(None) + } + } + } else { + Ok(None) + } +} + + /// DwarfResolver provides abilities to query DWARF information of binaries. pub(crate) struct DwarfResolver { /// The lazily parsed compilation units of the DWARF file. // SAFETY: We must not hand out references with a 'static lifetime to // this member. Rather, they should never outlive `self`. // Furthermore, this member has to be listed before `parser` - // to make sure we never end up with a dangling reference. + // and `_linkee_parser` to make sure we never end up with a + // dangling reference. units: Units<'static>, parser: Rc, + /// If the source file contains a valid debug link, this parser + /// represents it. + _linkee_parser: Option>, } impl DwarfResolver { @@ -93,11 +205,16 @@ impl DwarfResolver { } pub fn from_parser(parser: Rc) -> Result { + let linkee_parser = try_deref_debug_link(&parser)?; + // SAFETY: We own the `ElfParser` and make sure that it stays // around while the `Units` object uses it. As such, it // is fine to conjure a 'static lifetime here. - let static_parser = - unsafe { mem::transmute::<&ElfParser, &'static ElfParser>(parser.deref()) }; + let static_parser = unsafe { + mem::transmute::<&ElfParser, &'static ElfParser>( + linkee_parser.as_ref().unwrap_or(&parser).deref(), + ) + }; let mut load_section = |section| reader::load_section(static_parser, section); let mut dwarf = Dwarf::load(&mut load_section)?; // Cache abbreviations (which will cause them to be @@ -108,7 +225,11 @@ impl DwarfResolver { let () = dwarf.populate_abbreviations_cache(AbbreviationsCacheStrategy::Duplicates); let units = Units::parse(dwarf)?; - let slf = Self { units, parser }; + let slf = Self { + units, + parser, + _linkee_parser: linkee_parser, + }; Ok(slf) } @@ -367,6 +488,24 @@ mod tests { assert_eq!(format!("{err:#}"), format!("failed to read: {inner}")); } + /// Check that we resolve debug links correctly. + #[test] + fn debug_link_resolution() { + let path = Path::new(&env!("CARGO_MANIFEST_DIR")) + .join("data") + .join("test-stable-addrs-stripped-with-link.bin"); + let resolver = DwarfResolver::open(&path).unwrap(); + assert!(resolver._linkee_parser.is_some()); + + let linkee_path = Path::new(&env!("CARGO_MANIFEST_DIR")) + .join("data") + .join("test-stable-addrs-dwarf-only.dbg"); + assert_eq!( + resolver._linkee_parser.as_ref().unwrap().path(), + Some(linkee_path.as_path()) + ); + } + /// Check that we can find the source code location of an address. #[test] fn source_location_finding() { diff --git a/tests/blazesym.rs b/tests/blazesym.rs index 5e883882..6ae20d8c 100644 --- a/tests/blazesym.rs +++ b/tests/blazesym.rs @@ -353,6 +353,7 @@ fn symbolize_dwarf_gsym_inlined() { for file in [ "test-stable-addrs-stripped-elf-with-dwarf.bin", + "test-stable-addrs-stripped-with-link.bin", "test-stable-addrs-compressed-debug-zlib.bin", #[cfg(feature = "zstd")] "test-stable-addrs-compressed-debug-zstd.bin", @@ -366,6 +367,42 @@ fn symbolize_dwarf_gsym_inlined() { } } +/// Make sure that we fail loading linked debug information on CRC +/// mismatch. +#[test] +fn symbolize_dwarf_wrong_debug_link_crc() { + let path = Path::new(&env!("CARGO_MANIFEST_DIR")) + .join("data") + .join("test-stable-addrs-stripped-with-link-to-wrong-crc.bin"); + let src = symbolize::Source::from(symbolize::Elf::new(path)); + let symbolizer = Symbolizer::new(); + let err = symbolizer + .symbolize_single(&src, symbolize::Input::VirtOffset(0x2000100)) + .unwrap_err(); + assert!( + err.to_string() + .contains("checksum does not match expected one"), + "{err:?}" + ); +} + +/// Check that we do not error out when a debug link does not exist. +#[test] +fn symbolize_dwarf_non_existent_debug_link() { + let path = Path::new(&env!("CARGO_MANIFEST_DIR")) + .join("data") + .join("test-stable-addrs-stripped-with-link-non-existent.bin"); + let src = symbolize::Source::from(symbolize::Elf::new(path)); + let symbolizer = Symbolizer::builder().enable_auto_reload(false).build(); + let result = symbolizer + .symbolize_single(&src, symbolize::Input::VirtOffset(0x2000100)) + .unwrap() + .into_sym(); + // Because the binary is stripped, we don't expect any symbol + // resolution. + assert_eq!(result, None); +} + /// Make sure that we report (enabled) or don't report (disabled) inlined /// functions with Breakpad sources. #[test]