diff --git a/src/builder.rs b/src/builder.rs index 0f4ef8ff..3c943591 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -4,7 +4,9 @@ use std::io::prelude::*; use std::path::Path; use std::str; +use crate::header::GNU_SPARSE_HEADERS_COUNT; use crate::header::{path2bytes, HeaderMode}; +use crate::GnuExtSparseHeader; use crate::{other, EntryType, Header}; /// A structure for building archives @@ -12,20 +14,29 @@ use crate::{other, EntryType, Header}; /// This structure has methods for building up an archive from scratch into any /// arbitrary writer. pub struct Builder { - mode: HeaderMode, - follow: bool, + options: BuilderOptions, finished: bool, obj: Option, } +#[derive(Clone, Copy)] +struct BuilderOptions { + mode: HeaderMode, + follow: bool, + sparse: bool, +} + impl Builder { /// Create a new archive builder with the underlying object as the /// destination of all data written. The builder will use /// `HeaderMode::Complete` by default. pub fn new(obj: W) -> Builder { Builder { - mode: HeaderMode::Complete, - follow: true, + options: BuilderOptions { + mode: HeaderMode::Complete, + follow: true, + sparse: true, + }, finished: false, obj: Some(obj), } @@ -35,7 +46,7 @@ impl Builder { /// methods that implicitly read metadata for an input Path. Notably, this /// does _not_ apply to `append(Header)`. pub fn mode(&mut self, mode: HeaderMode) { - self.mode = mode; + self.options.mode = mode; } /// Follow symlinks, archiving the contents of the file they point to rather @@ -44,7 +55,14 @@ impl Builder { /// When true, it exhibits the same behavior as GNU `tar` command's /// `--dereference` or `-h` options . pub fn follow_symlinks(&mut self, follow: bool) { - self.follow = follow; + self.options.follow = follow; + } + + /// Handle sparse files efficiently, if supported by the underlying + /// filesystem. When true, sparse file information is read from disk and + /// empty segments are omitted from the archive. Defaults to true. + pub fn sparse(&mut self, sparse: bool) { + self.options.sparse = sparse; } /// Gets shared reference to the underlying object. @@ -279,9 +297,8 @@ impl Builder { /// ar.append_path("foo/bar.txt").unwrap(); /// ``` pub fn append_path>(&mut self, path: P) -> io::Result<()> { - let mode = self.mode.clone(); - let follow = self.follow; - append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow) + let options = self.options; + append_path_with_name(self.get_mut(), path.as_ref(), None, options) } /// Adds a file on the local filesystem to this archive under another name. @@ -317,15 +334,8 @@ impl Builder { path: P, name: N, ) -> io::Result<()> { - let mode = self.mode.clone(); - let follow = self.follow; - append_path_with_name( - self.get_mut(), - path.as_ref(), - Some(name.as_ref()), - mode, - follow, - ) + let options = self.options; + append_path_with_name(self.get_mut(), path.as_ref(), Some(name.as_ref()), options) } /// Adds a file to this archive with the given path as the name of the file @@ -355,8 +365,8 @@ impl Builder { /// ar.append_file("bar/baz.txt", &mut f).unwrap(); /// ``` pub fn append_file>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> { - let mode = self.mode.clone(); - append_file(self.get_mut(), path.as_ref(), file, mode) + let options = self.options; + append_file(self.get_mut(), path.as_ref(), file, options) } /// Adds a directory to this archive with the given path as the name of the @@ -392,8 +402,8 @@ impl Builder { P: AsRef, Q: AsRef, { - let mode = self.mode.clone(); - append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode) + let options = self.options; + append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), options) } /// Adds a directory and all of its contents (recursively) to this archive @@ -454,15 +464,8 @@ impl Builder { P: AsRef, Q: AsRef, { - let mode = self.mode.clone(); - let follow = self.follow; - append_dir_all( - self.get_mut(), - path.as_ref(), - src_path.as_ref(), - mode, - follow, - ) + let options = self.options; + append_dir_all(self.get_mut(), path.as_ref(), src_path.as_ref(), options) } /// Finish writing this archive, emitting the termination sections. @@ -570,14 +573,16 @@ impl Drop for EntryWriter<'_> { fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> { dst.write_all(header.as_bytes())?; let len = io::copy(&mut data, &mut dst)?; + pad_zeroes(&mut dst, len)?; + Ok(()) +} - // Pad with zeros if necessary. +fn pad_zeroes(dst: &mut dyn Write, len: u64) -> io::Result<()> { let buf = [0; 512]; let remaining = 512 - (len % 512); if remaining < 512 { dst.write_all(&buf[..remaining as usize])?; } - Ok(()) } @@ -585,10 +590,9 @@ fn append_path_with_name( dst: &mut dyn Write, path: &Path, name: Option<&Path>, - mode: HeaderMode, - follow: bool, + options: BuilderOptions, ) -> io::Result<()> { - let stat = if follow { + let stat = if options.follow { fs::metadata(path).map_err(|err| { io::Error::new( err.kind(), @@ -605,23 +609,16 @@ fn append_path_with_name( }; let ar_name = name.unwrap_or(path); if stat.is_file() { - append_fs(dst, ar_name, &stat, &mut fs::File::open(path)?, mode, None) + append_file(dst, ar_name, &mut fs::File::open(path)?, options) } else if stat.is_dir() { - append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None) + append_fs(dst, ar_name, &stat, options.mode, None) } else if stat.file_type().is_symlink() { let link_name = fs::read_link(path)?; - append_fs( - dst, - ar_name, - &stat, - &mut io::empty(), - mode, - Some(&link_name), - ) + append_fs(dst, ar_name, &stat, options.mode, Some(&link_name)) } else { #[cfg(unix)] { - append_special(dst, path, &stat, mode) + append_special(dst, path, &stat, options.mode) } #[cfg(not(unix))] { @@ -678,20 +675,44 @@ fn append_file( dst: &mut dyn Write, path: &Path, file: &mut fs::File, - mode: HeaderMode, + options: BuilderOptions, ) -> io::Result<()> { let stat = file.metadata()?; - append_fs(dst, path, &stat, file, mode, None) + let mut header = Header::new_gnu(); + + prepare_header_path(dst, &mut header, path)?; + header.set_metadata_in_mode(&stat, options.mode); + let sparse_entries = if options.sparse { + prepare_header_sparse(file, &stat, &mut header)? + } else { + None + }; + header.set_cksum(); + dst.write_all(header.as_bytes())?; + + if let Some(sparse_entries) = sparse_entries { + append_extended_sparse_headers(dst, &sparse_entries)?; + for entry in sparse_entries.entries { + file.seek(io::SeekFrom::Start(entry.offset))?; + io::copy(&mut file.take(entry.num_bytes), dst)?; + } + pad_zeroes(dst, sparse_entries.on_disk_size)?; + } else { + let len = io::copy(file, dst)?; + pad_zeroes(dst, len)?; + } + + Ok(()) } fn append_dir( dst: &mut dyn Write, path: &Path, src_path: &Path, - mode: HeaderMode, + options: BuilderOptions, ) -> io::Result<()> { let stat = fs::metadata(src_path)?; - append_fs(dst, path, &stat, &mut io::empty(), mode, None) + append_fs(dst, path, &stat, options.mode, None) } fn prepare_header(size: u64, entry_type: u8) -> Header { @@ -759,11 +780,67 @@ fn prepare_header_link( Ok(()) } +fn prepare_header_sparse( + file: &mut fs::File, + stat: &fs::Metadata, + header: &mut Header, +) -> io::Result> { + let entries = match find_sparse_entries(file, stat)? { + Some(entries) => entries, + _ => return Ok(None), + }; + + header.set_entry_type(EntryType::GNUSparse); + header.set_size(entries.on_disk_size); + + // Write the first 4 (GNU_SPARSE_HEADERS_COUNT) entries to the given header. + // The remaining entries will be written as subsequent extended headers. See + // https://www.gnu.org/software/tar/manual/html_section/Sparse-Formats.html#Old-GNU-Format + // for details on the format. + let gnu_header = &mut header.as_gnu_mut().unwrap(); + gnu_header.set_real_size(entries.size()); + + for (entry, header_entry) in std::iter::zip(&entries.entries, &mut gnu_header.sparse) { + header_entry.set_offset(entry.offset); + header_entry.set_length(entry.num_bytes); + } + gnu_header.set_is_extended(entries.entries.len() > gnu_header.sparse.len()); + + Ok(Some(entries)) +} + +/// Write extra sparse headers into `dst` for those entries that did not fit in the main header. +fn append_extended_sparse_headers(dst: &mut dyn Write, entries: &SparseEntries) -> io::Result<()> { + // The first `GNU_SPARSE_HEADERS_COUNT` entries are written to the main header, so skip them. + let mut it = entries + .entries + .iter() + .skip(GNU_SPARSE_HEADERS_COUNT) + .peekable(); + + // Each GnuExtSparseHeader can hold up to fixed number of sparse entries (21). + // So we pack entries into multiple headers if necessary. + while it.peek().is_some() { + let mut ext_header = GnuExtSparseHeader::new(); + for header_entry in ext_header.sparse.iter_mut() { + if let Some(entry) = it.next() { + header_entry.set_offset(entry.offset); + header_entry.set_length(entry.num_bytes); + } else { + break; + } + } + ext_header.set_is_extended(it.peek().is_some()); + dst.write_all(ext_header.as_bytes())?; + } + + Ok(()) +} + fn append_fs( dst: &mut dyn Write, path: &Path, meta: &fs::Metadata, - read: &mut dyn Read, mode: HeaderMode, link_name: Option<&Path>, ) -> io::Result<()> { @@ -775,50 +852,455 @@ fn append_fs( prepare_header_link(dst, &mut header, link_name)?; } header.set_cksum(); - append(dst, &header, read) + dst.write_all(header.as_bytes()) } fn append_dir_all( dst: &mut dyn Write, path: &Path, src_path: &Path, - mode: HeaderMode, - follow: bool, + options: BuilderOptions, ) -> io::Result<()> { let mut stack = vec![(src_path.to_path_buf(), true, false)]; while let Some((src, is_dir, is_symlink)) = stack.pop() { let dest = path.join(src.strip_prefix(&src_path).unwrap()); // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true - if is_dir || (is_symlink && follow && src.is_dir()) { + if is_dir || (is_symlink && options.follow && src.is_dir()) { for entry in fs::read_dir(&src)? { let entry = entry?; let file_type = entry.file_type()?; stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink())); } if dest != Path::new("") { - append_dir(dst, &dest, &src, mode)?; + append_dir(dst, &dest, &src, options)?; } - } else if !follow && is_symlink { + } else if !options.follow && is_symlink { let stat = fs::symlink_metadata(&src)?; let link_name = fs::read_link(&src)?; - append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name))?; + append_fs(dst, &dest, &stat, options.mode, Some(&link_name))?; } else { #[cfg(unix)] { let stat = fs::metadata(&src)?; if !stat.is_file() { - append_special(dst, &dest, &stat, mode)?; + append_special(dst, &dest, &stat, options.mode)?; continue; } } - append_file(dst, &dest, &mut fs::File::open(src)?, mode)?; + append_file(dst, &dest, &mut fs::File::open(src)?, options)?; } } Ok(()) } +#[derive(Debug, Clone, PartialEq, Eq)] +struct SparseEntries { + entries: Vec, + on_disk_size: u64, +} + +impl SparseEntries { + fn size(&self) -> u64 { + self.entries.last().map_or(0, |e| e.offset + e.num_bytes) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +struct SparseEntry { + offset: u64, + num_bytes: u64, +} + +/// Find sparse entries in a file. Returns: +/// * `Ok(Some(_))` if the file is sparse. +/// * `Ok(None)` if the file is not sparse, or if the file system does not +/// support sparse files. +/// * `Err(_)` if an error occurred. The lack of support for sparse files is not +/// considered an error. It might return an error if the file is modified +/// while reading. +fn find_sparse_entries( + file: &mut fs::File, + stat: &fs::Metadata, +) -> io::Result> { + #[cfg(not(any(target_os = "android", target_os = "freebsd", target_os = "linux")))] + { + let _ = file; + let _ = stat; + Ok(None) + } + + #[cfg(any(target_os = "android", target_os = "freebsd", target_os = "linux"))] + find_sparse_entries_seek(file, stat) +} + +/// Implementation of `find_sparse_entries` using `SEEK_HOLE` and `SEEK_DATA`. +#[cfg(any(target_os = "android", target_os = "freebsd", target_os = "linux"))] +fn find_sparse_entries_seek( + file: &mut fs::File, + stat: &fs::Metadata, +) -> io::Result> { + use std::os::unix::fs::MetadataExt as _; + use std::os::unix::io::AsRawFd as _; + + fn lseek(file: &fs::File, offset: i64, whence: libc::c_int) -> Result { + #[cfg(any(target_os = "linux", target_os = "android"))] + let lseek = libc::lseek64; + #[cfg(not(any(target_os = "linux", target_os = "android")))] + let lseek = libc::lseek; + + match unsafe { lseek(file.as_raw_fd(), offset, whence) } { + -1 => Err(io::Error::last_os_error().raw_os_error().unwrap()), + off => Ok(off), + } + } + + if stat.blocks() == 0 { + return Ok(if stat.size() == 0 { + // Empty file. + None + } else { + // Fully sparse file. + Some(SparseEntries { + entries: vec![SparseEntry { + offset: stat.size(), + num_bytes: 0, + }], + on_disk_size: 0, + }) + }); + } + + // On most Unices, we need to read `_PC_MIN_HOLE_SIZE` to see if the file + // system supports `SEEK_HOLE`. + // FreeBSD: https://man.freebsd.org/cgi/man.cgi?query=lseek&sektion=2&manpath=FreeBSD+14.1-STABLE + #[cfg(not(any(target_os = "linux", target_os = "android")))] + if unsafe { libc::fpathconf(file.as_raw_fd(), libc::_PC_MIN_HOLE_SIZE) } == -1 { + return Ok(None); + } + + // Linux is the only UNIX-like without support for `_PC_MIN_HOLE_SIZE`, so + // instead we try to call `lseek` and see if it fails. + #[cfg(any(target_os = "linux", target_os = "android"))] + match lseek(file, 0, libc::SEEK_HOLE) { + Ok(_) => (), + Err(libc::ENXIO) => { + // The file is empty. Treat it as non-sparse. + return Ok(None); + } + Err(_) => return Ok(None), + } + + let mut entries = Vec::new(); + let mut on_disk_size = 0; + let mut off_s = 0; + loop { + // off_s=0 │ off_s │ off_s + // ↓ │ ↓ │ ↓ + // | DATA |… │ ……………| HOLE | DATA |… │ …|×EOF× + // ↑ │ ↑ ↑ │ + // (a) │ (b) (c) (d) │ (e) + match lseek(file, off_s, libc::SEEK_DATA) { + Ok(0) if off_s == 0 => (), // (a) The file starts with data. + Ok(off) if off < off_s => { + // (b) Unlikely. + return Err(std::io::Error::new( + io::ErrorKind::Other, + "lseek(SEEK_DATA) went backwards", + )); + } + Ok(off) if off == off_s => { + // (c) The data at the same offset as the hole. + return Err(std::io::Error::new( + io::ErrorKind::Other, + "lseek(SEEK_DATA) did not advance. \ + Did the file change while appending?", + )); + } + Ok(off) => off_s = off, // (d) Jump to the next hole. + Err(libc::ENXIO) => break, // (e) Reached the end of the file. + Err(errno) => return Err(io::Error::from_raw_os_error(errno)), + }; + + // off_s=0 │ off_s │ off_s + // ↓ │ ↓ │ ↓ + // | DATA |×EOF× │ ……………| DATA | HOLE |… │ …|×EOF× + // ↑ │ ↑ ↑ │ + // (a) │ (b) (c) (d) │ (e) + match lseek(file, off_s, libc::SEEK_HOLE) { + Ok(off_e) if off_s == 0 && (off_e as u64) == stat.size() => { + // (a) The file is not sparse. + file.seek(io::SeekFrom::Start(0))?; + return Ok(None); + } + Ok(off_e) if off_e < off_s => { + // (b) Unlikely. + return Err(std::io::Error::new( + io::ErrorKind::Other, + "lseek(SEEK_HOLE) went backwards", + )); + } + Ok(off_e) if off_e == off_s => { + // (c) The hole at the same offset as the data. + return Err(std::io::Error::new( + io::ErrorKind::Other, + "lseek(SEEK_HOLE) did not advance. \ + Did the file change while appending?", + )); + } + Ok(off_e) => { + // (d) Found a hole or reached the end of the file (implicit + // zero-length hole). + entries.push(SparseEntry { + offset: off_s as u64, + num_bytes: off_e as u64 - off_s as u64, + }); + on_disk_size += off_e as u64 - off_s as u64; + off_s = off_e; + } + Err(libc::ENXIO) => { + // (e) off_s was already beyond the end of the file. + return Err(std::io::Error::new( + io::ErrorKind::Other, + "lseek(SEEK_HOLE) returned ENXIO. \ + Did the file change while appending?", + )); + } + Err(errno) => return Err(io::Error::from_raw_os_error(errno)), + }; + } + + if off_s as u64 > stat.size() { + return Err(std::io::Error::new( + io::ErrorKind::Other, + "lseek(SEEK_DATA) went beyond the end of the file. \ + Did the file change while appending?", + )); + } + + // Add a final zero-length entry. It is required if the file ends with a + // hole, and redundant otherwise. However, we add it unconditionally to + // mimic GNU tar behavior. + entries.push(SparseEntry { + offset: stat.size(), + num_bytes: 0, + }); + + file.seek(io::SeekFrom::Start(0))?; + + Ok(Some(SparseEntries { + entries, + on_disk_size, + })) +} + impl Drop for Builder { fn drop(&mut self) { let _ = self.finish(); } } + +#[cfg(test)] +mod tests { + use super::*; + + /// Should be multiple of 4KiB on ext4, multiple of 32KiB on FreeBSD/UFS. + const SPARSE_BLOCK_SIZE: u64 = 32768; + + #[test] + fn test_find_sparse_entries() { + let cases: &[(&str, &[SparseEntry])] = &[ + ("|", &[]), + ( + "| | | | |", + &[SparseEntry { + offset: 4 * SPARSE_BLOCK_SIZE, + num_bytes: 0, + }], + ), + ( + "|####|####|####|####|", + &[ + SparseEntry { + offset: 0, + num_bytes: 4 * SPARSE_BLOCK_SIZE, + }, + SparseEntry { + offset: 4 * SPARSE_BLOCK_SIZE, + num_bytes: 0, + }, + ], + ), + ( + "|####|####| | |", + &[ + SparseEntry { + offset: 0, + num_bytes: 2 * SPARSE_BLOCK_SIZE, + }, + SparseEntry { + offset: 4 * SPARSE_BLOCK_SIZE, + num_bytes: 0, + }, + ], + ), + ( + "| | |####|####|", + &[ + SparseEntry { + offset: 2 * SPARSE_BLOCK_SIZE, + num_bytes: 2 * SPARSE_BLOCK_SIZE, + }, + SparseEntry { + offset: 4 * SPARSE_BLOCK_SIZE, + num_bytes: 0, + }, + ], + ), + ( + "|####| |####| |", + &[ + SparseEntry { + offset: 0, + num_bytes: SPARSE_BLOCK_SIZE, + }, + SparseEntry { + offset: 2 * SPARSE_BLOCK_SIZE, + num_bytes: SPARSE_BLOCK_SIZE, + }, + SparseEntry { + offset: 4 * SPARSE_BLOCK_SIZE, + num_bytes: 0, + }, + ], + ), + ( + "|####| | |####|", + &[ + SparseEntry { + offset: 0, + num_bytes: SPARSE_BLOCK_SIZE, + }, + SparseEntry { + offset: 3 * SPARSE_BLOCK_SIZE, + num_bytes: SPARSE_BLOCK_SIZE, + }, + SparseEntry { + offset: 4 * SPARSE_BLOCK_SIZE, + num_bytes: 0, + }, + ], + ), + ( + "| |####|####| |", + &[ + SparseEntry { + offset: 1 * SPARSE_BLOCK_SIZE, + num_bytes: 2 * SPARSE_BLOCK_SIZE, + }, + SparseEntry { + offset: 4 * SPARSE_BLOCK_SIZE, + num_bytes: 0, + }, + ], + ), + ]; + + let mut file = tempfile::tempfile().unwrap(); + + for &(description, map) in cases { + file.set_len(0).unwrap(); + file.set_len(map.last().map_or(0, |e| e.offset + e.num_bytes)) + .unwrap(); + + for e in map { + file.seek(io::SeekFrom::Start(e.offset)).unwrap(); + for _ in 0..e.num_bytes / SPARSE_BLOCK_SIZE { + file.write_all(&[0xFF; SPARSE_BLOCK_SIZE as usize]).unwrap(); + } + } + + let expected = match map { + // Empty file. + &[] => None, + + // 100% dense. + &[SparseEntry { + offset: 0, + num_bytes: x1, + }, SparseEntry { + offset: x2, + num_bytes: 0, + }] if x1 == x2 => None, + + // Sparse. + map => Some(SparseEntries { + entries: map.to_vec(), + on_disk_size: map.iter().map(|e| e.num_bytes).sum(), + }), + }; + + let stat = file.metadata().unwrap(); + let reported = find_sparse_entries(&mut file, &stat).unwrap(); + + // Loose check: we did not miss any data blocks. + if let Err(e) = loose_check_sparse_entries(reported.as_ref(), expected.as_ref()) { + panic!( + "Case: {description}\n\ + Reported: {reported:?}\n\ + Expected: {expected:?}\n\ + Error: {e}", + ); + } + + // On Linux, always do a strict check. Skip on FreeBSD, as on UFS + // the last block is always dense, even if it's zero-filled. + #[cfg(any(target_os = "android", target_os = "linux"))] + assert_eq!(reported, expected, "Case: {description}"); + } + } + + fn loose_check_sparse_entries( + reported: Option<&SparseEntries>, + expected: Option<&SparseEntries>, + ) -> Result<(), &'static str> { + let reported = match reported { + Some(entries) => entries, // Reported as sparse. + // It's not an error to report a sparse file as non-sparse. + None => return Ok(()), + }; + let expected = match expected { + Some(entries) => entries, + None => return Err("Expected dense file, but reported as sparse"), + }; + + // Check that we didn't miss any data blocks. However, reporting some + // holes as data is not an error during the loose check. + if expected.entries.iter().any(|e| { + !reported + .entries + .iter() + .any(|r| e.offset >= r.offset && e.offset + e.num_bytes <= r.offset + r.num_bytes) + }) { + return Err("Reported is not a superset of expected"); + } + + if reported.entries.last() != expected.entries.last() { + return Err("Last zero-length entry is not as expected"); + } + + // Check invariants of SparseEntries. + let mut prev_end = None; + for e in &reported.entries[..reported.entries.len()] { + if prev_end.map_or(false, |p| e.offset < p) { + return Err("Overlapping or unsorted entries"); + } + prev_end = Some(e.offset + e.num_bytes); + } + + if reported.on_disk_size != reported.entries.iter().map(|e| e.num_bytes).sum() { + return Err("Incorrect on-disk size"); + } + + Ok(()) + } +} diff --git a/src/header.rs b/src/header.rs index 36fb52ae..8e39ab63 100644 --- a/src/header.rs +++ b/src/header.rs @@ -24,6 +24,10 @@ use crate::EntryType; #[cfg(any(unix, windows))] const DETERMINISTIC_TIMESTAMP: u64 = 1153704088; +pub(crate) const GNU_SPARSE_HEADERS_COUNT: usize = 4; + +pub(crate) const GNU_EXT_SPARSE_HEADERS_COUNT: usize = 21; + /// Representation of the header of an entry in an archive #[repr(C)] #[allow(missing_docs)] @@ -112,7 +116,7 @@ pub struct GnuHeader { pub offset: [u8; 12], pub longnames: [u8; 4], pub unused: [u8; 1], - pub sparse: [GnuSparseHeader; 4], + pub sparse: [GnuSparseHeader; GNU_SPARSE_HEADERS_COUNT], pub isextended: [u8; 1], pub realsize: [u8; 12], pub pad: [u8; 17], @@ -135,7 +139,7 @@ pub struct GnuSparseHeader { #[repr(C)] #[allow(missing_docs)] pub struct GnuExtSparseHeader { - pub sparse: [GnuSparseHeader; 21], + pub sparse: [GnuSparseHeader; GNU_EXT_SPARSE_HEADERS_COUNT], pub isextended: [u8; 1], pub padding: [u8; 7], } @@ -1262,6 +1266,11 @@ impl GnuHeader { }) } + /// Encodes the `real_size` provided into this header. + pub fn set_real_size(&mut self, real_size: u64) { + num_field_wrapper_into(&mut self.realsize, real_size); + } + /// Indicates whether this header will be followed by additional /// sparse-header records. /// @@ -1271,6 +1280,15 @@ impl GnuHeader { self.isextended[0] == 1 } + /// Sets whether this header should be followed by additional sparse-header + /// records. + /// + /// To append a sparse [`std::fs::File`] to an archive, prefer using the + /// [`crate::Builder`] instead. + pub fn set_is_extended(&mut self, is_extended: bool) { + self.isextended[0] = if is_extended { 1 } else { 0 }; + } + /// Views this as a normal `Header` pub fn as_header(&self) -> &Header { unsafe { cast(self) } @@ -1330,6 +1348,11 @@ impl GnuSparseHeader { }) } + /// Encodes the `offset` provided into this header. + pub fn set_offset(&mut self, offset: u64) { + num_field_wrapper_into(&mut self.offset, offset); + } + /// Length of the block /// /// Returns `Err` for a malformed `numbytes` field. @@ -1341,6 +1364,11 @@ impl GnuSparseHeader { ) }) } + + /// Encodes the `length` provided into this header. + pub fn set_length(&mut self, length: u64) { + num_field_wrapper_into(&mut self.numbytes, length); + } } impl fmt::Debug for GnuSparseHeader { @@ -1382,10 +1410,20 @@ impl GnuExtSparseHeader { &self.sparse } + /// Same as `sparse` but mutable version. + pub fn sparse_mut(&mut self) -> &mut [GnuSparseHeader; 21] { + &mut self.sparse + } + /// Indicates if another sparse header should be following this one. pub fn is_extended(&self) -> bool { self.isextended[0] == 1 } + + /// Sets whether another sparse header should be following this one. + pub fn set_is_extended(&mut self, is_extended: bool) { + self.isextended[0] = if is_extended { 1 } else { 0 }; + } } impl Default for GnuExtSparseHeader { diff --git a/tests/all.rs b/tests/all.rs index 5ac28ce5..eff0e4da 100644 --- a/tests/all.rs +++ b/tests/all.rs @@ -1222,6 +1222,67 @@ fn sparse_with_trailing() { assert_eq!(&s[0x100_000..], "1MB through\n"); } +#[test] +fn writing_sparse() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let mut files = Vec::new(); + let mut append_file = |name: &str, chunks: &[(u64, u64)]| { + let path = td.path().join(name); + let mut file = t!(File::create(&path)); + t!(file.set_len( + chunks + .iter() + .map(|&(off, len)| off + len) + .max() + .unwrap_or(0), + )); + for (i, &(off, len)) in chunks.iter().enumerate() { + t!(file.seek(io::SeekFrom::Start(off))); + let mut data = vec![i as u8 + b'a'; len as usize]; + data.first_mut().map(|x| *x = b'['); + data.last_mut().map(|x| *x = b']'); + t!(file.write_all(&data)); + } + t!(ar.append_path_with_name(&path, path.file_name().unwrap())); + files.push(path); + }; + + append_file("empty", &[]); + append_file("full_sparse", &[(0x20_000, 0)]); + append_file("_x", &[(0x20_000, 0x1_000)]); + append_file("x_", &[(0, 0x1_000), (0x20_000, 0)]); + append_file("_x_x", &[(0x20_000, 0x1_000), (0x40_000, 0x1_000)]); + append_file("x_x_", &[(0, 0x1_000), (0x20_000, 0x1_000), (0x40_000, 0)]); + append_file("uneven", &[(0x20_333, 0x555), (0x40_777, 0x999)]); + + t!(ar.finish()); + + let data = t!(ar.into_inner()); + + // Without sparse support, the size of the tarball exceed 1MiB. + #[cfg(target_os = "linux")] + assert!(data.len() <= 37 * 1024); // ext4 (defaults to 4k block size) + #[cfg(target_os = "freebsd")] + assert!(data.len() <= 273 * 1024); // UFS (defaults to 32k block size, last block isn't a hole) + + let mut ar = Archive::new(&data[..]); + let mut entries = t!(ar.entries()); + for path in files { + let mut f = t!(entries.next().unwrap()); + + let mut s = String::new(); + t!(f.read_to_string(&mut s)); + + let expected = t!(fs::read_to_string(&path)); + + assert!(s == expected, "path: {path:?}"); + } + + assert!(entries.next().is_none()); +} + #[test] fn path_separators() { let mut ar = Builder::new(Vec::new());