From b9f15cc7f2b8881fe0c9e5b772962d1797908bff Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Sat, 18 Sep 2021 12:48:15 -0400 Subject: [PATCH 1/7] iso9660: use Read.take() instead of LimitReader std already has a mechanism to make a reader return Ok(0) when a defined limit is reached, so let's just use it. --- src/iso9660.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/iso9660.rs b/src/iso9660.rs index 146ad6af3..0a0bacbe8 100644 --- a/src/iso9660.rs +++ b/src/iso9660.rs @@ -34,7 +34,7 @@ use anyhow::{anyhow, bail, Context, Result}; use bytes::{Buf, Bytes}; use serde::Serialize; -use crate::io::{LimitReader, BUFFER_SIZE}; +use crate::io::BUFFER_SIZE; // technically the standard supports others, but this is the only one we support const ISO9660_SECTOR_SIZE: usize = 2048; @@ -169,7 +169,7 @@ impl IsoFs { .with_context(|| format!("seeking to file {}", file.name))?; Ok(BufReader::with_capacity( BUFFER_SIZE, - LimitReader::new(&self.file, file.length as u64, None), + (&self.file).take(file.length as u64), )) } From 6d2b9f42a5ee174ef54339ea17698da453fcb0fb Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Sat, 18 Sep 2021 12:48:30 -0400 Subject: [PATCH 2/7] Revert "io: make LimitReader conflict optional" Read.take() already handles the case where we just want to return Ok(0), so let's just use that when we need it. This reverts commit da13fe03ad53aab1838f31dbd9a6ab34757039bf. --- src/download.rs | 4 +--- src/io.rs | 41 +++++++++++++++-------------------------- 2 files changed, 16 insertions(+), 29 deletions(-) diff --git a/src/download.rs b/src/download.rs index 9e8aef464..8814113f0 100644 --- a/src/download.rs +++ b/src/download.rs @@ -230,9 +230,7 @@ where let byte_limit = saved.map(|saved| saved.get_offset()).transpose()?.flatten(); let mut limit_reader: Box = match byte_limit { None => Box::new(decompress_reader), - Some((limit, conflict)) => { - Box::new(LimitReader::new(decompress_reader, limit, Some(conflict))) - } + Some((limit, conflict)) => Box::new(LimitReader::new(decompress_reader, limit, conflict)), }; // Read the first MiB of input and, if requested, check it against the diff --git a/src/io.rs b/src/io.rs index eedc2ab17..54ed881c3 100644 --- a/src/io.rs +++ b/src/io.rs @@ -217,11 +217,11 @@ pub struct LimitReader { source: R, length: u64, remaining: u64, - conflict: Option, + conflict: String, } impl LimitReader { - pub fn new(source: R, length: u64, conflict: Option) -> Self { + pub fn new(source: R, length: u64, conflict: String) -> Self { Self { source, length, @@ -238,17 +238,14 @@ impl Read for LimitReader { } let allowed = self.remaining.min(buf.len() as u64); if allowed == 0 { - return match self.conflict { - None => return Ok(0), - // reached the limit; only error if we're not at EOF - Some(ref msg) => match self.source.read(&mut buf[..1]) { - Ok(0) => Ok(0), - Ok(_) => Err(io::Error::new( - io::ErrorKind::Other, - format!("collision with {} at offset {}", msg, self.length), - )), - Err(e) => Err(e), - }, + // reached the limit; only error if we're not at EOF + return match self.source.read(&mut buf[..1]) { + Ok(0) => Ok(0), + Ok(_) => Err(io::Error::new( + io::ErrorKind::Other, + format!("collision with {} at offset {}", self.conflict, self.length), + )), + Err(e) => Err(e), }; } let count = self.source.read(&mut buf[..allowed as usize])?; @@ -478,7 +475,7 @@ mod tests { // limit larger than file let mut file = Cursor::new(data.clone()); - let mut lim = LimitReader::new(&mut file, 150, Some("foo".into())); + let mut lim = LimitReader::new(&mut file, 150, "foo".into()); let mut buf = [0u8; 60]; assert_eq!(lim.read(&mut buf).unwrap(), 60); assert_eq!(buf[..], data[0..60]); @@ -488,7 +485,7 @@ mod tests { // limit exactly equal to file let mut file = Cursor::new(data.clone()); - let mut lim = LimitReader::new(&mut file, 100, Some("foo".into())); + let mut lim = LimitReader::new(&mut file, 100, "foo".into()); let mut buf = [0u8; 60]; assert_eq!(lim.read(&mut buf).unwrap(), 60); assert_eq!(buf[..], data[0..60]); @@ -498,7 +495,7 @@ mod tests { // buffer smaller than limit let mut file = Cursor::new(data.clone()); - let mut lim = LimitReader::new(&mut file, 90, Some("foo".into())); + let mut lim = LimitReader::new(&mut file, 90, "foo".into()); let mut buf = [0u8; 60]; assert_eq!(lim.read(&mut buf).unwrap(), 60); assert_eq!(buf[..], data[0..60]); @@ -511,7 +508,7 @@ mod tests { // buffer exactly equal to limit let mut file = Cursor::new(data.clone()); - let mut lim = LimitReader::new(&mut file, 60, Some("foo".into())); + let mut lim = LimitReader::new(&mut file, 60, "foo".into()); let mut buf = [0u8; 60]; assert_eq!(lim.read(&mut buf).unwrap(), 60); assert_eq!(buf[..], data[0..60]); @@ -522,7 +519,7 @@ mod tests { // buffer larger than limit let mut file = Cursor::new(data.clone()); - let mut lim = LimitReader::new(&mut file, 50, Some("foo".into())); + let mut lim = LimitReader::new(&mut file, 50, "foo".into()); let mut buf = [0u8; 60]; assert_eq!(lim.read(&mut buf).unwrap(), 50); assert_eq!(buf[..50], data[0..50]); @@ -530,13 +527,5 @@ mod tests { lim.read(&mut buf).unwrap_err().to_string(), "collision with foo at offset 50" ); - - // test no collision - let mut file = Cursor::new(data.clone()); - let mut lim = LimitReader::new(&mut file, 50, None); - let mut buf = [0u8; 60]; - assert_eq!(lim.read(&mut buf).unwrap(), 50); - assert_eq!(buf[..50], data[0..50]); - assert_eq!(lim.read(&mut buf).unwrap(), 0); } } From b68404516f631b8a6127ed331a1f63236b97c40b Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Sat, 18 Sep 2021 01:20:27 -0400 Subject: [PATCH 3/7] iso9660: convert get_dir/file into try_into methods on DirectoryRecord It's a bit more verbose but seems more composable. --- src/iso9660.rs | 46 +++++++++++++++++++++++----------------------- src/live.rs | 2 +- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/iso9660.rs b/src/iso9660.rs index 0a0bacbe8..9a51aa7e5 100644 --- a/src/iso9660.rs +++ b/src/iso9660.rs @@ -105,13 +105,13 @@ impl IsoFs { for component in parent_dir.components() { if let std::path::Component::Normal(c) = component { let c = c.to_str().unwrap(); // `path` is &str - match self.get_dir_record(&dir, c)? { - Some(DirectoryRecord::Directory(d)) => dir = d, - Some(DirectoryRecord::File(_)) => { - bail!("component {:?} in path {} is not a directory", c, path) - } - None => bail!("intermediate directory {} does not exist", c), - } + dir = self + .get_dir_record(&dir, c)? + .ok_or_else(|| anyhow!("intermediate directory {} does not exist", c))? + .try_into_dir() + .map_err(|_| { + anyhow!("component {:?} in path {} is not a directory", c, path) + })?; } else { bail!("path is not canonical: {}", path); } @@ -126,22 +126,6 @@ impl IsoFs { }) } - /// Returns the record for a specific directory. - pub fn get_dir(&mut self, path: &str) -> Result { - match self.get_path(path)? { - DirectoryRecord::Directory(d) => Ok(d), - DirectoryRecord::File(_) => Err(anyhow!("path {} is a file", path)), - } - } - - /// Returns the record for a specific file. - pub fn get_file(&mut self, path: &str) -> Result { - match self.get_path(path)? { - DirectoryRecord::Directory(_) => Err(anyhow!("path {} is a directory", path)), - DirectoryRecord::File(f) => Ok(f), - } - } - /// Returns the record for a specific name in a directory if it exists. pub fn get_dir_record( &mut self, @@ -212,6 +196,22 @@ pub enum DirectoryRecord { File(File), } +impl DirectoryRecord { + pub fn try_into_dir(self) -> Result { + match self { + Self::Directory(d) => Ok(d), + Self::File(f) => Err(anyhow!("entry {} is a file", f.name)), + } + } + + pub fn try_into_file(self) -> Result { + match self { + Self::Directory(f) => Err(anyhow!("entry {} is a directory", f.name)), + Self::File(f) => Ok(f), + } + } +} + #[derive(Debug, Serialize, Clone)] pub struct Directory { pub name: String, diff --git a/src/live.rs b/src/live.rs index f77b66c18..ed00f539a 100644 --- a/src/live.rs +++ b/src/live.rs @@ -677,7 +677,7 @@ pub fn iso_inspect(config: &IsoInspectConfig) -> Result<()> { pub fn iso_extract_pxe(config: &IsoExtractPxeConfig) -> Result<()> { let mut iso = IsoFs::from_file(open_live_iso(&config.input, None)?)?; - let pxeboot = iso.get_dir(COREOS_ISO_PXEBOOT_DIR)?; + let pxeboot = iso.get_path(COREOS_ISO_PXEBOOT_DIR)?.try_into_dir()?; std::fs::create_dir_all(&config.output_dir)?; let base = { From 691a43599c32eb968d0a82682499ecf21c7f3992 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Sat, 18 Sep 2021 02:09:02 -0400 Subject: [PATCH 4/7] iso9660: add NotFound error type Wrap get_path() ENOENT-type errors in a new error type so callers can detect this case and handle it separately if desired. --- Cargo.lock | 1 + Cargo.toml | 1 + src/iso9660.rs | 24 +++++++++++++++--------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ed5b12892..8b6d26b8e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -186,6 +186,7 @@ dependencies = [ "serde_json", "structopt", "tempfile", + "thiserror", "url", "uuid", "walkdir", diff --git a/Cargo.toml b/Cargo.toml index 5feaf6726..51ef540e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,6 +58,7 @@ serde = { version = "^1.0", features = ["derive"] } serde_json = "^1.0" structopt = "0.3" tempfile = ">= 3.1, < 4" +thiserror = "1.0" url = ">= 2.1, < 3.0" uuid = { version = "^0.8", features = ["v4"] } walkdir = "^2.3" diff --git a/src/iso9660.rs b/src/iso9660.rs index 9a51aa7e5..6433f9da1 100644 --- a/src/iso9660.rs +++ b/src/iso9660.rs @@ -107,10 +107,15 @@ impl IsoFs { let c = c.to_str().unwrap(); // `path` is &str dir = self .get_dir_record(&dir, c)? - .ok_or_else(|| anyhow!("intermediate directory {} does not exist", c))? + .ok_or_else(|| { + NotFound(format!("intermediate directory {} does not exist", c)) + })? .try_into_dir() .map_err(|_| { - anyhow!("component {:?} in path {} is not a directory", c, path) + NotFound(format!( + "component {:?} in path {} is not a directory", + c, path + )) })?; } else { bail!("path is not canonical: {}", path); @@ -118,20 +123,16 @@ impl IsoFs { } self.get_dir_record(&dir, filename)?.ok_or_else(|| { - anyhow!( + anyhow!(NotFound(format!( "no record for {} in directory {}", filename, parent_dir.display() - ) + ))) }) } /// Returns the record for a specific name in a directory if it exists. - pub fn get_dir_record( - &mut self, - dir: &Directory, - name: &str, - ) -> Result> { + fn get_dir_record(&mut self, dir: &Directory, name: &str) -> Result> { for record in self .list_dir(dir) .with_context(|| format!("listing directory {}", dir.name))? @@ -226,6 +227,11 @@ pub struct File { pub length: u32, } +/// Requested path was not found. +#[derive(Debug, thiserror::Error)] +#[error("{0}")] +pub struct NotFound(String); + /// Reads all the volume descriptors. fn get_volume_descriptors(f: &mut fs::File) -> Result> { const ISO9660_VOLUME_DESCRIPTORS: u64 = 0x10 * (ISO9660_SECTOR_SIZE as u64); From 897528ce7997427b57254f1678cb46a9580a2f10 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Mon, 20 Sep 2021 18:19:40 -0400 Subject: [PATCH 5/7] iso9660: explicitly loop in get_next_directory_record() There's already a case where we want to retry reading a directory record, and we're about to add a second case. Add an explicit loop rather than doing it ad-hoc. --- src/iso9660.rs | 83 +++++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/src/iso9660.rs b/src/iso9660.rs index 6433f9da1..0c93048fc 100644 --- a/src/iso9660.rs +++ b/src/iso9660.rs @@ -402,51 +402,52 @@ impl<'a> IsoFsWalkIterator<'a> { /// Reads the directory record at cursor and advances to the next one. fn get_next_directory_record(buf: &mut Bytes, length: u32) -> Result> { - if !buf.has_remaining() { - return Ok(None); - } - - let mut len = buf.get_u8() as usize; - if len == 0 { - let jump = { - // calculate where we are we in the directory - let pos = length as usize - buf.remaining(); - // get distance to next 2k-aligned address - ((pos + ISO9660_SECTOR_SIZE) & !(ISO9660_SECTOR_SIZE - 1)) - pos - }; - if jump >= buf.remaining() { + loop { + if !buf.has_remaining() { return Ok(None); } - buf.advance(jump); - len = buf.get_u8() as usize; - } - // + 1 because len includes the length of the length byte itself, which we already read - if buf.remaining() + 1 < len { - bail!("incomplete directory record; corrupt ISO?"); - } + let len = buf.get_u8() as usize; + if len == 0 { + let jump = { + // calculate where we are we in the directory + let pos = length as usize - buf.remaining(); + // get distance to next 2k-aligned address + ((pos + ISO9660_SECTOR_SIZE) & !(ISO9660_SECTOR_SIZE - 1)) - pos + }; + if jump >= buf.remaining() { + return Ok(None); + } + buf.advance(jump); + continue; + } else if len > buf.remaining() + 1 { + // + 1 because len includes the length of the length byte + // itself, which we already read + bail!("incomplete directory record; corrupt ISO?"); + } - let address = (eat(buf, 1).get_u32_le() as u64) * (ISO9660_SECTOR_SIZE as u64); - let length = eat(buf, 4).get_u32_le(); - let flags = eat(buf, 25 - 14).get_u8(); - let name_length = eat(buf, 32 - 26).get_u8() as usize; - let name = parse_iso9660_path(buf, name_length).context("parsing record name")?; - - // advance to next record - eat(buf, len - (33 + name_length)); - - if flags & 2 > 0 { - Ok(Some(DirectoryRecord::Directory(Directory { - name, - address, - length, - }))) - } else { - Ok(Some(DirectoryRecord::File(File { - name, - address, - length, - }))) + let address = (eat(buf, 1).get_u32_le() as u64) * (ISO9660_SECTOR_SIZE as u64); + let length = eat(buf, 4).get_u32_le(); + let flags = eat(buf, 25 - 14).get_u8(); + let name_length = eat(buf, 32 - 26).get_u8() as usize; + let name = parse_iso9660_path(buf, name_length).context("parsing record name")?; + + // advance to next record + eat(buf, len - (33 + name_length)); + + if flags & 2 > 0 { + return Ok(Some(DirectoryRecord::Directory(Directory { + name, + address, + length, + }))); + } else { + return Ok(Some(DirectoryRecord::File(File { + name, + address, + length, + }))); + } } } From bcb24958ae2f7ef126c05d4f57a3f90a58328c32 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Sat, 18 Sep 2021 18:54:03 -0400 Subject: [PATCH 6/7] iso9660: remove "." and ".." from directory lists We don't actually want or use them, so simplify the higher levels of abstraction by ignoring these when they're read. As a special case, continue supporting "." as the name of the root directory in the primary volume descriptor, since that's its name. --- src/iso9660.rs | 71 ++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/src/iso9660.rs b/src/iso9660.rs index 0c93048fc..de4dae674 100644 --- a/src/iso9660.rs +++ b/src/iso9660.rs @@ -291,7 +291,7 @@ impl PrimaryVolumeDescriptor { parse_iso9660_string(eat(buf, 1), 32, IsoString::StrA).context("parsing system id")?; let volume_id = // technically should be StrD, but non-compliance is common parse_iso9660_string(buf, 32, IsoString::StrA).context("parsing volume id")?; - let root = match get_next_directory_record(eat(buf, 156 - 72), 34)? { + let root = match get_next_directory_record(eat(buf, 156 - 72), 34, true)? { Some(DirectoryRecord::Directory(d)) => d, _ => bail!("failed to parse root directory record from primary descriptor"), }; @@ -345,7 +345,7 @@ impl IsoFsIterator { impl Iterator for IsoFsIterator { type Item = Result; fn next(&mut self) -> Option { - get_next_directory_record(&mut self.dir, self.length) + get_next_directory_record(&mut self.dir, self.length, false) .context("reading next record") .transpose() } @@ -376,9 +376,6 @@ impl<'a> IsoFsWalkIterator<'a> { let mut path = self.dirpath.clone(); match r { DirectoryRecord::Directory(ref d) => { - if d.name == "." || d.name == ".." { - continue; - } self.parent_dirs.push(self.current_dir.take().unwrap()); self.dirpath.push(&d.name); self.current_dir = Some(IsoFsIterator::new(self.iso, d)?); @@ -401,7 +398,11 @@ impl<'a> IsoFsWalkIterator<'a> { } /// Reads the directory record at cursor and advances to the next one. -fn get_next_directory_record(buf: &mut Bytes, length: u32) -> Result> { +fn get_next_directory_record( + buf: &mut Bytes, + length: u32, + is_root: bool, +) -> Result> { loop { if !buf.has_remaining() { return Ok(None); @@ -430,39 +431,41 @@ fn get_next_directory_record(buf: &mut Bytes, length: u32) -> Result 0 { - return Ok(Some(DirectoryRecord::Directory(Directory { - name, - address, - length, - }))); - } else { - return Ok(Some(DirectoryRecord::File(File { - name, - address, - length, - }))); - } - } -} - -/// Reads a directory record path. This is similar to a regular ISO9660 string, but supports '\0' -/// to mean current directory, and '\1' for the parent directory. -fn parse_iso9660_path(buf: &mut Bytes, len: usize) -> Result { - if len == 1 && (buf[0] == 0 || buf[0] == 1) { - let c = buf.get_u8(); - if c == 0 { - Ok(".".into()) - } else { - Ok("..".into()) + if let Some(name) = name { + if flags & 2 > 0 { + return Ok(Some(DirectoryRecord::Directory(Directory { + name, + address, + length, + }))); + } else { + return Ok(Some(DirectoryRecord::File(File { + name, + address, + length, + }))); + } } - } else { - parse_iso9660_string(buf, len, IsoString::File) } } From 8f84ea4936c3ef6359be5c7912c41240542bebc1 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Mon, 20 Sep 2021 20:48:09 -0400 Subject: [PATCH 7/7] iso9660: canonicalize path early in get_path() Remove the non-Normal elements first to make the rest of the code easier to reason about. --- src/iso9660.rs | 93 +++++++++++++++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 36 deletions(-) diff --git a/src/iso9660.rs b/src/iso9660.rs index de4dae674..1b0571337 100644 --- a/src/iso9660.rs +++ b/src/iso9660.rs @@ -84,49 +84,31 @@ impl IsoFs { /// Returns the record for a specific path. pub fn get_path(&mut self, path: &str) -> Result { - let root_dir = self.get_root_directory()?; - let as_path = Path::new(path); - let mut parent_dir = if let Some(p) = as_path.parent() { - p - } else { - return Ok(DirectoryRecord::Directory(root_dir)); + let mut dir = self.get_root_directory()?; + let mut components = path_components(path); + let filename = match components.pop() { + Some(f) => f, + None => return Ok(DirectoryRecord::Directory(dir)), }; - if parent_dir.has_root() { - parent_dir = parent_dir - .strip_prefix("/") - .with_context(|| format!("making path '{}' relative", path))?; - } - let filename = as_path - .file_name() - .ok_or_else(|| anyhow!("path {} has no base", path))?; - let filename = filename.to_str().unwrap(); // `path` is &str - - let mut dir = root_dir; - for component in parent_dir.components() { - if let std::path::Component::Normal(c) = component { - let c = c.to_str().unwrap(); // `path` is &str - dir = self - .get_dir_record(&dir, c)? - .ok_or_else(|| { - NotFound(format!("intermediate directory {} does not exist", c)) - })? - .try_into_dir() - .map_err(|_| { - NotFound(format!( - "component {:?} in path {} is not a directory", - c, path - )) - })?; - } else { - bail!("path is not canonical: {}", path); - } + + for c in &components { + dir = self + .get_dir_record(&dir, c)? + .ok_or_else(|| NotFound(format!("intermediate directory {} does not exist", c)))? + .try_into_dir() + .map_err(|_| { + NotFound(format!( + "component {:?} in path {} is not a directory", + c, path + )) + })?; } self.get_dir_record(&dir, filename)?.ok_or_else(|| { anyhow!(NotFound(format!( "no record for {} in directory {}", filename, - parent_dir.display() + components.join("/") ))) }) } @@ -522,3 +504,42 @@ fn eat(buf: &mut Bytes, n: usize) -> &mut Bytes { buf.advance(n); buf } + +/// Parse path into a Vec<&str> with zero or more components. Convert path +/// to relative and resolve all "." and ".." components. +fn path_components(s: &str) -> Vec<&str> { + // empty paths are treated the same as "/" to allow round-tripping + use std::path::Component::*; + let mut ret = Vec::new(); + for c in Path::new(s).components() { + match c { + Prefix(_) | RootDir | CurDir => (), + ParentDir => { + ret.pop(); + } + Normal(c) => { + ret.push(c.to_str().unwrap()); // `s` is &str + } + } + } + ret +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_path_components() { + // basic + assert_eq!(path_components("z"), vec!["z"]); + // absolute path with . and .. + assert_eq!(path_components("/a/./../b"), vec!["b"]); + // relative path, traversal past root + assert_eq!(path_components("./a/../../b"), vec!["b"]); + // just the root + assert_eq!(path_components("/"), Vec::new() as Vec<&str>); + // empty string + assert_eq!(path_components(""), Vec::new() as Vec<&str>); + } +}