Skip to content

Commit

Permalink
Hook up GNU debug link support to DwarfResolver
Browse files Browse the repository at this point in the history
This change adds debug link support to our DwarfResolver. Specifically,
when a DwarfResolver is created, we check whether the opened file
contains a debug link. If so, we try to find the target file it
represents in a list of known directories and then load the DWARF
information from there. If none of the directories contains the file,
the debug link will be silently ignored. The reason for this behavior is
that many distributions seem to distribute binaries with debug links in
them, but then make the linked debug information optional by having it
contained in a dedicated package.
There are many locations at which this functionality could fit it.
Considered candidates are: 1) ElfResolverData::elf_resolver(), 2)
ElfResolver itself, and 3) DwarfResolver (the approach taken). 1) is
somewhat nice because it keep the existing resolver simple and to the
point. However, it is insufficient, because we expose ElfResolver
publicly and we most certainly want to enable debug link support for
users. Implementation inside ElfResolver is possible, but it seems
inferior to putting everything into DwarfResolver: we only intend to
follow debug links if DWARF support is enabled and eventually we could
consider checking both the original ELF file as well as the linkee ELF
file as fallback options when no symbol is found. As such, DwarfResolver
seems like the most apt location.

Refs: #60

Signed-off-by: Daniel Müller <[email protected]>
  • Loading branch information
d-e-s-o authored and danielocfb committed May 7, 2024
1 parent 6f42787 commit 4249bb7
Show file tree
Hide file tree
Showing 4 changed files with 221 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
IDs if `cache_build_ids` is `true`
- Added support for compressed debug information
- Added `zlib` (default enabled) and `zstd` (default disabled) features
- Added support for transparently following debug links in ELF binaries
- Adjusted `Inspector::for_each` signature to no longer carry explicit state
around
- Introduced `normalize::Reason` enum to provide best guess at why normalization
Expand Down
45 changes: 39 additions & 6 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ use std::ffi::OsStr;
use std::ffi::OsString;
use std::fs::create_dir_all;
use std::fs::hard_link;
use std::fs::remove_file;
use std::fs::write;
use std::fs::File;
use std::io::Error;
use std::io::ErrorKind;
use std::io::Result;
use std::io::Write as _;
use std::ops::Deref;
use std::path::Path;
use std::path::PathBuf;
Expand Down Expand Up @@ -263,7 +267,6 @@ fn syms(_src: &Path, _dst: impl AsRef<OsStr>) {
/// Unpack an xz compressed file.
#[cfg(feature = "xz2")]
fn unpack_xz(src: &Path, dst: &Path) {
use std::fs::File;
use std::io::copy;
use xz2::read::XzDecoder;

Expand Down Expand Up @@ -295,8 +298,6 @@ fn unpack_xz(_src: &Path, _dst: &Path) {
#[cfg(feature = "zip")]
fn zip(files: &[PathBuf], dst: &Path) {
use std::fs::read as read_file;
use std::fs::File;
use std::io::Write as _;
use zip::write::SimpleFileOptions;
use zip::CompressionMethod;
use zip::ZipWriter;
Expand Down Expand Up @@ -362,6 +363,16 @@ fn cc_stable_addrs(dst: impl AsRef<OsStr>, options: &[&str]) {
cc(&src, dst, &args)
}

/// Open the file at `path` for writing and append `data` to it.
fn append(path: &Path, data: &[u8]) -> Result<()> {
{
let mut file = File::options().append(true).open(path)?;
let () = file.write_all(data)?;
}
let () = adjust_mtime(path).unwrap();
Ok(())
}

/// Prepare the various test files.
fn prepare_test_files() {
let data_dir = data_dir();
Expand Down Expand Up @@ -487,6 +498,31 @@ fn prepare_test_files() {
],
);

dwarf(&src, "test-stable-addrs-dwarf-only-wrong-crc.dbg");
let dbg = data_dir.join("test-stable-addrs-dwarf-only-wrong-crc.dbg");
objcopy(
&src,
"test-stable-addrs-stripped-with-link-to-wrong-crc.bin",
&[
"--strip-all",
&format!("--add-gnu-debuglink={}", dbg.display()),
],
);
append(&dbg, &[0]).unwrap();


let dbg = data_dir.join("test-stable-addrs-dwarf-only-non-existent.dbg");
let () = write(&dbg, [0]).unwrap();
objcopy(
&src,
"test-stable-addrs-stripped-with-link-non-existent.bin",
&[
"--strip-all",
&format!("--add-gnu-debuglink={}", dbg.display()),
],
);
let () = remove_file(&dbg).unwrap();

let src = data_dir.join("kallsyms.xz");
let mut dst = src.clone();
assert!(dst.set_extension(""));
Expand Down Expand Up @@ -519,9 +555,6 @@ fn prepare_test_files() {
/// Download a multi-part file split into `part_count` pieces.
#[cfg(feature = "reqwest")]
fn download_multi_part(base_url: &reqwest::Url, part_count: usize, dst: &Path) {
use std::fs::File;
use std::io::Write as _;

let mut dst = File::create(dst).unwrap();
for part in 1..=part_count {
let url = reqwest::Url::parse(&format!("{}.part{part}", base_url.as_str())).unwrap();
Expand Down
149 changes: 144 additions & 5 deletions src/dwarf/resolver.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
use std::borrow::Cow;
#[cfg(test)]
use std::env;
use std::ffi::OsStr;
use std::fmt::Debug;
use std::fmt::Formatter;
use std::fmt::Result as FmtResult;
use std::mem;
use std::mem::swap;
use std::ops::Deref as _;
#[cfg(test)]
use std::path::Path;
use std::path::PathBuf;
use std::rc::Rc;

use gimli::AbbreviationsCacheStrategy;
Expand All @@ -19,6 +20,8 @@ use crate::error::IntoCowStr;
use crate::inspect::FindAddrOpts;
use crate::inspect::Inspect;
use crate::inspect::SymInfo;
use crate::log::debug;
use crate::log::warn;
use crate::symbolize::CodeInfo;
use crate::symbolize::FindSymOpts;
use crate::symbolize::InlinedFn;
Expand All @@ -29,9 +32,12 @@ use crate::symbolize::Symbolize;
use crate::Addr;
use crate::Error;
use crate::ErrorExt;
use crate::Mmap;
use crate::Result;
use crate::SymType;

use super::debug_link::debug_link_crc32;
use super::debug_link::read_debug_link;
use super::function::Function;
use super::location::Location;
use super::reader;
Expand Down Expand Up @@ -75,15 +81,121 @@ impl From<Option<gimli::DwLang>> for SrcLang {
}


/// Find a debug file in a list of default directories.
///
/// `linker` is the path to the file containing the debug link. This function
/// searches a couple of "well-known" locations and then others constructed
/// based on the canonicalized path of `linker`.
///
/// # Notes
/// This function ignores any errors encountered.
// TODO: Ideally this discovery functionality would be provided in the
// form of an iterator for better testability.
fn find_debug_file(file: &OsStr, linker: Option<&Path>) -> Option<PathBuf> {
macro_rules! return_if_exists {
($path:ident) => {
if $path.exists() {
debug!("found debug info at {}", $path.display());
return Some($path)
}
};
}

// First check known fixed locations.
let path = Path::new("/lib/debug/").join(file);
return_if_exists!(path);

let path = Path::new("/usr/lib/debug/").join(file);
return_if_exists!(path);

// Next check others that depend on the absolute `linker` (which may
// not be retrievable). E.g., assuming `linker` is `/usr/lib64/libc.so` and
// `file` is `libc.so.debug`, it would also search:
// - /usr/lib64/libc.so.debug
// - /usr/lib/debug/usr/lib64/libc.so.debug
// - /usr/lib/debug/usr/libc.so.debug

// TODO: Different heuristics may be possible here. E.g., honor
// .debug directories and check the current working directory
// (??). Also, users could want to pass in a directory.
if let Some(linker) = linker {
if let Ok(mut path) = linker.canonicalize() {
let () = path.set_file_name(file);
return_if_exists!(path);

let mut ancestors = path.ancestors();
// Remove the file name, as we will always append it anyway.
let _ = ancestors.next();

for ancestor in ancestors {
let mut components = ancestor.components();
// Remove the root directory to make the path relative. That
// allows for joining to work as expected.
let _ = components.next();

// If the remaining path is empty we'd basically just cover
// one of the "fixed" cases above, so we can stop.
if components.as_path().as_os_str().is_empty() {
break
}

let path = Path::new("/usr/lib/debug/")
.join(components.as_path())
.join(file);
return_if_exists!(path);
}
}
}
None
}


fn try_deref_debug_link(parser: &ElfParser) -> Result<Option<Rc<ElfParser>>> {
if let Some((file, checksum)) = read_debug_link(parser)? {
match find_debug_file(file, parser.path()) {
Some(path) => {
let mmap = Mmap::builder().open(&path).with_context(|| {
format!("failed to open debug link destination `{}`", path.display())
})?;
let crc = debug_link_crc32(&mmap);
if crc != checksum {
return Err(Error::with_invalid_data(format!(
"debug link destination `{}` checksum does not match \
expected one: {crc:x} (actual) != {checksum:x} (expected)",
path.display()
)))
}

let dst_parser = Rc::new(ElfParser::from_mmap(mmap, Some(path)));
Ok(Some(dst_parser))
}
None => {
warn!(
"debug link references destination `{}` which was not found in any known location",
Path::new(file).display(),
);
Ok(None)
}
}
} else {
Ok(None)
}
}


/// DwarfResolver provides abilities to query DWARF information of binaries.
pub(crate) struct DwarfResolver {
/// The lazily parsed compilation units of the DWARF file.
// SAFETY: We must not hand out references with a 'static lifetime to
// this member. Rather, they should never outlive `self`.
// Furthermore, this member has to be listed before `parser`
// to make sure we never end up with a dangling reference.
// and `_linkee_parser` to make sure we never end up with a
// dangling reference.
units: Units<'static>,
parser: Rc<ElfParser>,
/// If the source file contains a valid debug link, this parser
/// represents it.
_linkee_parser: Option<Rc<ElfParser>>,
}

impl DwarfResolver {
Expand All @@ -93,11 +205,16 @@ impl DwarfResolver {
}

pub fn from_parser(parser: Rc<ElfParser>) -> Result<Self, Error> {
let linkee_parser = try_deref_debug_link(&parser)?;

// SAFETY: We own the `ElfParser` and make sure that it stays
// around while the `Units` object uses it. As such, it
// is fine to conjure a 'static lifetime here.
let static_parser =
unsafe { mem::transmute::<&ElfParser, &'static ElfParser>(parser.deref()) };
let static_parser = unsafe {
mem::transmute::<&ElfParser, &'static ElfParser>(
linkee_parser.as_ref().unwrap_or(&parser).deref(),
)
};
let mut load_section = |section| reader::load_section(static_parser, section);
let mut dwarf = Dwarf::load(&mut load_section)?;
// Cache abbreviations (which will cause them to be
Expand All @@ -108,7 +225,11 @@ impl DwarfResolver {
let () = dwarf.populate_abbreviations_cache(AbbreviationsCacheStrategy::Duplicates);

let units = Units::parse(dwarf)?;
let slf = Self { units, parser };
let slf = Self {
units,
parser,
_linkee_parser: linkee_parser,
};
Ok(slf)
}

Expand Down Expand Up @@ -367,6 +488,24 @@ mod tests {
assert_eq!(format!("{err:#}"), format!("failed to read: {inner}"));
}

/// Check that we resolve debug links correctly.
#[test]
fn debug_link_resolution() {
let path = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test-stable-addrs-stripped-with-link.bin");
let resolver = DwarfResolver::open(&path).unwrap();
assert!(resolver._linkee_parser.is_some());

let linkee_path = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test-stable-addrs-dwarf-only.dbg");
assert_eq!(
resolver._linkee_parser.as_ref().unwrap().path(),
Some(linkee_path.as_path())
);
}

/// Check that we can find the source code location of an address.
#[test]
fn source_location_finding() {
Expand Down
37 changes: 37 additions & 0 deletions tests/blazesym.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ fn symbolize_dwarf_gsym_inlined() {

for file in [
"test-stable-addrs-stripped-elf-with-dwarf.bin",
"test-stable-addrs-stripped-with-link.bin",
"test-stable-addrs-compressed-debug-zlib.bin",
#[cfg(feature = "zstd")]
"test-stable-addrs-compressed-debug-zstd.bin",
Expand All @@ -366,6 +367,42 @@ fn symbolize_dwarf_gsym_inlined() {
}
}

/// Make sure that we fail loading linked debug information on CRC
/// mismatch.
#[test]
fn symbolize_dwarf_wrong_debug_link_crc() {
let path = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test-stable-addrs-stripped-with-link-to-wrong-crc.bin");
let src = symbolize::Source::from(symbolize::Elf::new(path));
let symbolizer = Symbolizer::new();
let err = symbolizer
.symbolize_single(&src, symbolize::Input::VirtOffset(0x2000100))
.unwrap_err();
assert!(
err.to_string()
.contains("checksum does not match expected one"),
"{err:?}"
);
}

/// Check that we do not error out when a debug link does not exist.
#[test]
fn symbolize_dwarf_non_existent_debug_link() {
let path = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test-stable-addrs-stripped-with-link-non-existent.bin");
let src = symbolize::Source::from(symbolize::Elf::new(path));
let symbolizer = Symbolizer::builder().enable_auto_reload(false).build();
let result = symbolizer
.symbolize_single(&src, symbolize::Input::VirtOffset(0x2000100))
.unwrap()
.into_sym();
// Because the binary is stripped, we don't expect any symbol
// resolution.
assert_eq!(result, None);
}

/// Make sure that we report (enabled) or don't report (disabled) inlined
/// functions with Breakpad sources.
#[test]
Expand Down

0 comments on commit 4249bb7

Please sign in to comment.