From e9638d25b029def77137eaaba0020d7845fd7e0f Mon Sep 17 00:00:00 2001 From: Andrew Gunnerson Date: Sun, 1 Sep 2024 01:39:18 -0400 Subject: [PATCH] Add support for packing and unpacking Android sparse images This supports all features of Android sparse images, including holes, and CRC32 (both full image checksum and CRC32 chunks). Partial sparse images, like those included in GrapheneOS' new optimized factory images, can also be packed and unpacked with these new commands, unlike AOSP's simg2img and img2simg tools. This new functionality is not relevant for avbroot's main use case, but is useful for unpacking certain factory images for comparison with OTAs during troubleshooting. Signed-off-by: Andrew Gunnerson --- Cargo.lock | 46 ++ README.extra.md | 46 +- avbroot/Cargo.toml | 2 + avbroot/src/cli/args.rs | 4 +- avbroot/src/cli/mod.rs | 1 + avbroot/src/cli/sparse.rs | 632 ++++++++++++++++++ avbroot/src/format/lp.rs | 8 +- avbroot/src/format/mod.rs | 1 + avbroot/src/format/sparse.rs | 1211 ++++++++++++++++++++++++++++++++++ avbroot/tests/sparse.rs | 173 +++++ deny.toml | 1 + fuzz/src/bin/sparse.rs | 27 + 12 files changed, 2146 insertions(+), 6 deletions(-) create mode 100644 avbroot/src/cli/sparse.rs create mode 100644 avbroot/src/format/sparse.rs create mode 100644 avbroot/tests/sparse.rs create mode 100644 fuzz/src/bin/sparse.rs diff --git a/Cargo.lock b/Cargo.lock index 3072181..61e0be2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -131,7 +131,9 @@ dependencies = [ "cms", "const-oid", "constcat", + "crc32fast", "ctrlc", + "dlv-list", "flate2", "gf256", "hex", @@ -412,6 +414,26 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + [[package]] name = "constcat" version = "0.5.0" @@ -461,6 +483,12 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto-common" version = "0.1.6" @@ -552,6 +580,15 @@ dependencies = [ "subtle", ] +[[package]] +name = "dlv-list" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" +dependencies = [ + "const-random", +] + [[package]] name = "e2e" version = "3.6.0" @@ -1694,6 +1731,15 @@ dependencies = [ "once_cell", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tls_codec" version = "0.4.1" diff --git a/README.extra.md b/README.extra.md index d6617ac..7807c9e 100644 --- a/README.extra.md +++ b/README.extra.md @@ -286,7 +286,7 @@ All metadata slots in the newly packed LP image will be identical. ### Repacking an LP image ```bash -avbroot lp repack [-i ] [-i ]... -o [-o ]... +avbroot lp repack -i [-i ]... -o [-o ]... ``` This subcommand is logically equivalent to `avbroot lp unpack` followed by `avbroot lp pack`, except more efficient. Instead of unpacking and packing all partition images, the raw data is directly copied from the old LP image to the new LP image. @@ -340,3 +340,47 @@ avbroot payload info -i ``` This subcommand shows all of the payload header fields (which will likely be extremely long). + +## `avbroot sparse` + +This set of commands is for working with Android sparse images. All features of the file format are supported, including hole chunks and CRC32 checksums. + +### Unpacking a sparse image + +```bash +avbroot sparse unpack -o -o +``` + +This subcommand unpacks a sparse image to a raw image. If the sparse image contains CRC32 checksums, they will be validated during unpacking. If the sparse image contains holes, the output image will be created as a native sparse file. + +Certain fastboot factory images may have multiple sparse images, like `super_1.img`, `super_2.img`, etc., where they all touch a disjoint set of regions on the same partition. These can be unpacked by running this subcommand for each sparse image and specifying the `--preserve` option along with using the same output file. This preserves the existing data in the output file when unpacking each sparse image. + +### Packing a sparse image + +```bash +avbroot sparse pack -i -o +``` + +This subcommand packs a new sparse image from a raw image. The default block size is 4096 bytes, which can be changed with the `--block-size` option. + +By default, this will pack the entire input file. However, on Linux, there is an optimization where all holes in the input file, if it is a native sparse file, will be stored as hole chunks instead of `0`-filled chunks in the output sparse image. + +To pack a partial sparse image, such as those used in the special fastboot factory images mentioned above, pass in `--region `. This option can be specified multiple times to pack multiple regions. + +Unlike AOSP's `img2simg` tool, which never writes CRC32 checksums, this subcommand will write checksums if the input file has no holes and the entire file is being packed. + +### Repacking a sparse image + +```bash +avbroot sparse repack -i -o +``` + +This subcommand is logically equivalent to `avbroot sparse unpack` followed by `avbroot sparse pack`, except more efficient. This is useful for roundtrip testing of avbroot's sparse file parser. + +### Showing sparse image metadata + +```bash +avbroot sparse info -i +``` + +This subcommand shows the sparse image metadata, including the header and all chunks. diff --git a/avbroot/Cargo.toml b/avbroot/Cargo.toml index 0300007..e006034 100644 --- a/avbroot/Cargo.toml +++ b/avbroot/Cargo.toml @@ -20,7 +20,9 @@ clap = { version = "4.4.1", features = ["derive"] } clap_complete = "4.4.0" cms = { version = "0.2.2", features = ["std"] } const-oid = "0.9.5" +crc32fast = "1.4.2" ctrlc = "3.4.0" +dlv-list = "0.5.2" flate2 = "1.0.27" gf256 = { version = "0.3.0", features = ["rs"] } hex = { version = "0.4.3", features = ["serde"] } diff --git a/avbroot/src/cli/args.rs b/avbroot/src/cli/args.rs index d991194..0532fca 100644 --- a/avbroot/src/cli/args.rs +++ b/avbroot/src/cli/args.rs @@ -15,7 +15,7 @@ use clap::{Parser, Subcommand, ValueEnum}; use tracing::{debug, Level}; use tracing_subscriber::fmt::{format::Writer, time::FormatTime}; -use crate::cli::{avb, boot, completion, cpio, fec, hashtree, key, lp, ota, payload}; +use crate::cli::{avb, boot, completion, cpio, fec, hashtree, key, lp, ota, payload, sparse}; #[allow(clippy::large_enum_variant)] #[derive(Debug, Subcommand)] @@ -30,6 +30,7 @@ pub enum Command { Lp(lp::LpCli), Ota(ota::OtaCli), Payload(payload::PayloadCli), + Sparse(sparse::SparseCli), /// (Deprecated: Use `avbroot ota patch` instead.) Patch(ota::PatchCli), /// (Deprecated: Use `avbroot ota extract` instead.) @@ -134,6 +135,7 @@ pub fn main(logging_initialized: &AtomicBool, cancel_signal: &AtomicBool) -> Res Command::Lp(c) => lp::lp_main(&c, cancel_signal), Command::Ota(c) => ota::ota_main(&c, cancel_signal), Command::Payload(c) => payload::payload_main(&c, cancel_signal), + Command::Sparse(c) => sparse::sparse_main(&c, cancel_signal), // Deprecated aliases. Command::Patch(c) => ota::patch_subcommand(&c, cancel_signal), Command::Extract(c) => ota::extract_subcommand(&c, cancel_signal), diff --git a/avbroot/src/cli/mod.rs b/avbroot/src/cli/mod.rs index 0b6371b..20e7576 100644 --- a/avbroot/src/cli/mod.rs +++ b/avbroot/src/cli/mod.rs @@ -14,3 +14,4 @@ pub mod key; pub mod lp; pub mod ota; pub mod payload; +pub mod sparse; diff --git a/avbroot/src/cli/sparse.rs b/avbroot/src/cli/sparse.rs new file mode 100644 index 0000000..643954f --- /dev/null +++ b/avbroot/src/cli/sparse.rs @@ -0,0 +1,632 @@ +/* + * SPDX-FileCopyrightText: 2024 Andrew Gunnerson + * SPDX-License-Identifier: GPL-3.0-only + */ + +use std::{ + fmt, + fs::{File, OpenOptions}, + io::{Read, Seek, SeekFrom, Write}, + ops::Range, + path::{Path, PathBuf}, + sync::atomic::AtomicBool, +}; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::{Parser, Subcommand}; +use crc32fast::Hasher; +use zerocopy::{little_endian, AsBytes}; + +use crate::{ + format::{ + padding, + sparse::{ + self, Chunk, ChunkBounds, ChunkData, ChunkList, CrcMode, Header, SparseReader, + SparseWriter, + }, + }, + stream, +}; + +struct CompactView<'a, T>(&'a [T]); + +impl<'a, T: fmt::Debug> fmt::Debug for CompactView<'a, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut list = f.debug_list(); + + for item in self.0 { + // No alternate mode for no inner newlines. + list.entry(&format_args!("{item:?}")); + } + + list.finish() + } +} + +#[derive(Clone)] +struct Metadata { + header: Header, + chunks: Vec, +} + +impl fmt::Debug for Metadata { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Metadata") + .field("header", &self.header) + .field("chunks", &CompactView(&self.chunks)) + .finish() + } +} + +fn open_reader(path: &Path) -> Result { + File::open(path).with_context(|| format!("Failed to open for reading: {path:?}")) +} + +fn open_writer(path: &Path, truncate: bool) -> Result { + OpenOptions::new() + .write(true) + .create(true) + .truncate(truncate) + .open(path) + .with_context(|| format!("Failed to open for writing: {path:?}")) +} + +fn display_metadata(cli: &SparseCli, metadata: &Metadata) { + if !cli.quiet { + println!("{metadata:#?}"); + } +} + +/// Splits large data chunks to ensure that none exceed 64 MiB. This is not +/// necessary in most cases, but is kept to match the behavior of AOSP's +/// libsparse. +fn split_chunks(chunks: &[Chunk], block_size: u32) -> Vec { + const MAX_BYTES: u32 = 64 * 1024 * 1024; + + let max_blocks_per_chunk = MAX_BYTES / block_size; + let mut result = vec![]; + + for mut chunk in chunks.iter().copied() { + if chunk.data == ChunkData::Data { + while chunk.bounds.len() > max_blocks_per_chunk { + result.push(Chunk { + bounds: ChunkBounds { + start: chunk.bounds.start, + end: chunk.bounds.start + max_blocks_per_chunk, + }, + data: chunk.data, + }); + + chunk.bounds.start += max_blocks_per_chunk; + } + } + + result.push(chunk); + } + + result +} + +/// [Linux only] Find allocated regions of the file. This avoids needing to read +/// unused portions of the file if it is a native sparse file. +#[cfg(any(target_os = "linux", target_os = "android"))] +fn find_allocated_regions( + path: &Path, + reader: &mut File, + cancel_signal: &AtomicBool, +) -> Result>> { + use rustix::{fs::SeekFrom, io::Errno}; + + let mut result = vec![]; + let mut start; + let mut end = 0; + + loop { + stream::check_cancel(cancel_signal)?; + + start = match rustix::fs::seek(&*reader, SeekFrom::Data(end as i64)) { + Ok(offset) => offset, + Err(e) if e == Errno::NXIO => break, + Err(e) => return Err(e).with_context(|| format!("Failed to seek to data: {path:?}")), + }; + + end = rustix::fs::seek(&*reader, SeekFrom::Hole(start as i64)) + .with_context(|| format!("Failed to seek to hole: {path:?}"))?; + + result.push(start..end); + } + + Ok(result) +} + +/// Compute chunk boundaries for the list of potentially overlapping file byte +/// regions. If `exact_bounds` is true, then the regions must be block-aligned. +/// Otherwise, the lower boundaries are aligned down and the upper boundaries +/// are aligned up. +fn get_chunks_for_regions( + block_size: u32, + file_size: u64, + file_regions: &[Range], + exact_bounds: bool, +) -> Result<(u32, Vec)> { + let block_size_64 = u64::from(block_size); + + let file_blocks: u32 = (file_size / u64::from(block_size)) + .try_into() + .map_err(|_| anyhow!("File size {file_size} too large for block size {block_size}"))?; + + let mut chunk_list = ChunkList::new(); + chunk_list.set_len(file_blocks); + + for region in file_regions { + let mut start_byte = region.start; + let mut end_byte = region.end; + + if exact_bounds { + if start_byte % block_size_64 != 0 || end_byte % block_size_64 != 0 { + bail!("File region bounds are not block-aligned: {region:?}"); + } + } else { + start_byte = start_byte / block_size_64 * block_size_64; + end_byte = padding::round(end_byte, block_size_64).unwrap(); + } + + let start_block: u32 = (start_byte / block_size_64).try_into().map_err(|_| { + anyhow!("Region start offset {start_byte} too large for block size {block_size}") + })?; + let end_block: u32 = (end_byte / block_size_64).try_into().map_err(|_| { + anyhow!("Region end offset {end_byte} too large for block size {block_size}") + })?; + + chunk_list.insert_data(ChunkBounds { + start: start_block, + end: end_block, + }); + } + + let chunks = chunk_list.iter_allocated().map(|c| c.bounds).collect(); + + Ok((file_blocks, chunks)) +} + +/// Compute the sparse [`Chunk`]s needed to cover the specified regions. +fn compute_chunks( + path: &Path, + reader: &mut File, + block_size: u32, + file_blocks: u32, + block_regions: &[ChunkBounds], + cancel_signal: &AtomicBool, +) -> Result<(ChunkList, u32)> { + let mut chunk_list = ChunkList::new(); + let mut hasher = Some(Hasher::new()); + let mut buf = vec![0u8; block_size as usize]; + let mut block = 0; + + chunk_list.set_len(file_blocks); + + for bounds in block_regions { + if bounds.start != block { + // Not contiguous so we cannot compute the checksum. + hasher = None; + } + + let offset = u64::from(bounds.start) * u64::from(block_size); + + reader + .seek(SeekFrom::Start(offset)) + .with_context(|| format!("Failed to seek file: {path:?}"))?; + + for block in *bounds { + stream::check_cancel(cancel_signal)?; + + reader + .read_exact(&mut buf) + .with_context(|| format!("Failed to read full block: {path:?}"))?; + + if let Some(h) = &mut hasher { + h.update(&buf); + } + + let new_bounds = ChunkBounds { + start: block, + end: block + 1, + }; + + if buf.chunks_exact(4).all(|c| c == &buf[..4]) { + let fill_value = u32::from_le_bytes(buf[..4].try_into().unwrap()); + chunk_list.insert_fill(new_bounds, fill_value); + } else { + chunk_list.insert_data(new_bounds); + } + } + + block = bounds.end; + } + + if block != file_blocks { + hasher = None; + } + + let crc32 = hasher.map(|h| h.finalize()).unwrap_or_default(); + + Ok((chunk_list, crc32)) +} + +fn unpack_subcommand( + sparse_cli: &SparseCli, + cli: &UnpackCli, + cancel_signal: &AtomicBool, +) -> Result<()> { + let reader = open_reader(&cli.input)?; + let mut sparse_reader = SparseReader::new(reader, CrcMode::Validate) + .with_context(|| format!("Failed to read sparse file: {:?}", cli.input))?; + + let mut metadata = Metadata { + header: sparse_reader.header(), + chunks: vec![], + }; + + let mut writer = open_writer(&cli.output, !cli.preserve)?; + + if cli.preserve { + let expected_size = + u64::from(metadata.header.num_blocks) * u64::from(metadata.header.block_size); + let file_size = writer + .seek(SeekFrom::End(0)) + .with_context(|| format!("Failed to get file size: {:?}", cli.output))?; + + if file_size < expected_size { + writer + .set_len(expected_size) + .with_context(|| format!("Failed to set file size: {:?}", cli.output))?; + } + + writer + .seek(SeekFrom::Start(0)) + .with_context(|| format!("Failed to seek file: {:?}", cli.output))?; + } + + while let Some(chunk) = sparse_reader + .next_chunk() + .with_context(|| format!("Failed to read chunk: {:?}", cli.input))? + { + match chunk.data { + ChunkData::Fill(value) => { + let fill_value = little_endian::U32::from(value); + let buf = vec![fill_value; metadata.header.block_size as usize / 4]; + + for _ in chunk.bounds { + stream::check_cancel(cancel_signal)?; + + writer + .write_all(buf.as_bytes()) + .with_context(|| format!("Failed to write data: {:?}", cli.output))?; + } + } + ChunkData::Data => { + // This cannot overflow. + let to_copy = chunk.bounds.len() * metadata.header.block_size; + + stream::copy_n( + &mut sparse_reader, + &mut writer, + to_copy.into(), + cancel_signal, + ) + .with_context(|| { + format!("Failed to copy data: {:?} -> {:?}", cli.input, cli.output) + })?; + } + ChunkData::Hole => { + // This cannot overflow. + let to_skip = chunk.bounds.len() * metadata.header.block_size; + + writer + .seek(SeekFrom::Current(to_skip.into())) + .with_context(|| format!("Failed to seek file: {:?}", cli.output))?; + } + ChunkData::Crc32(_) => {} + } + + metadata.chunks.push(chunk); + } + + display_metadata(sparse_cli, &metadata); + + sparse_reader + .finish() + .with_context(|| format!("Failed to finalize reader: {:?}", cli.input))?; + + Ok(()) +} + +fn pack_subcommand( + sparse_cli: &SparseCli, + cli: &PackCli, + cancel_signal: &AtomicBool, +) -> Result<()> { + if cli.block_size == 0 || cli.block_size % 4 != 0 { + bail!( + "Block size must be a non-zero multiple of 4: {}", + cli.block_size, + ); + } + + let mut reader = open_reader(&cli.input)?; + + let file_size = reader + .seek(SeekFrom::End(0)) + .with_context(|| format!("Failed to get file size: {:?}", cli.input))?; + if file_size % u64::from(cli.block_size) != 0 { + bail!( + "File size {file_size} is not a multiple of block size {}", + cli.block_size, + ); + } + + // Compute the byte regions to pack into the sparse file. + let (file_regions, exact_bounds) = if !cli.region.is_empty() { + let regions = cli + .region + .chunks_exact(2) + .map(|c| c[0]..c[1]) + .collect::>(); + + (regions, false) + } else { + #[cfg(any(target_os = "linux", target_os = "android"))] + { + let regions = find_allocated_regions(&cli.input, &mut reader, cancel_signal)?; + + (regions, false) + } + #[cfg(not(any(target_os = "linux", target_os = "android")))] + { + (vec![0..file_size], true) + } + }; + + // Get the file regions as non-overlapping and sorted block regions. + let (file_blocks, block_regions) = + get_chunks_for_regions(cli.block_size, file_size, &file_regions, exact_bounds)?; + + // Compute the checksum (if possible) and the list of actual chunks. + let (chunk_list, crc32) = compute_chunks( + &cli.input, + &mut reader, + cli.block_size, + file_blocks, + &block_regions, + cancel_signal, + )?; + + let chunks = split_chunks(&chunk_list.to_chunks(), cli.block_size); + let metadata = Metadata { + header: Header { + major_version: sparse::MAJOR_VERSION, + minor_version: sparse::MINOR_VERSION, + block_size: cli.block_size, + num_blocks: chunk_list.len(), + // This can't overflow because the number of chunks is always + // smaller than the number of blocks (because we don't add CRC32 + // chunks). + num_chunks: chunks.len() as u32, + // This will be zero if the regions don't span the entire file. + crc32, + }, + chunks, + }; + + display_metadata(sparse_cli, &metadata); + + let writer = open_writer(&cli.output, true)?; + let mut sparse_writer = SparseWriter::new(writer, metadata.header) + .with_context(|| format!("Failed to initialize sparse file: {:?}", cli.output))?; + + for chunk in metadata.chunks { + sparse_writer + .start_chunk(chunk) + .with_context(|| format!("Failed to start chunk: {:?}", cli.output))?; + + if chunk.data == ChunkData::Data { + let offset = u64::from(chunk.bounds.start) * u64::from(cli.block_size); + + reader + .seek(SeekFrom::Start(offset)) + .with_context(|| format!("Failed to seek file: {:?}", cli.input))?; + + let to_copy = u64::from(chunk.bounds.len()) * u64::from(cli.block_size); + + stream::copy_n(&mut reader, &mut sparse_writer, to_copy, cancel_signal).with_context( + || format!("Failed to copy data: {:?} -> {:?}", cli.input, cli.output), + )?; + } + } + + sparse_writer + .finish() + .with_context(|| format!("Failed to finalize writer: {:?}", cli.output))?; + + Ok(()) +} + +fn repack_subcommand( + sparse_cli: &SparseCli, + cli: &RepackCli, + cancel_signal: &AtomicBool, +) -> Result<()> { + let reader = open_reader(&cli.input)?; + let mut sparse_reader = SparseReader::new_seekable(reader, CrcMode::Validate) + .with_context(|| format!("Failed to read sparse file: {:?}", cli.input))?; + + let mut metadata = Metadata { + header: sparse_reader.header(), + chunks: vec![], + }; + + let writer = open_writer(&cli.output, true)?; + let mut sparse_writer = SparseWriter::new(writer, metadata.header) + .with_context(|| format!("Failed to initialize sparse file: {:?}", cli.output))?; + + while let Some(chunk) = sparse_reader + .next_chunk() + .with_context(|| format!("Failed to read chunk: {:?}", cli.input))? + { + sparse_writer + .start_chunk(chunk) + .with_context(|| format!("Failed to start chunk: {:?}", cli.output))?; + + if chunk.data == ChunkData::Data { + // This cannot overflow. + let to_copy = chunk.bounds.len() * metadata.header.block_size; + + stream::copy_n( + &mut sparse_reader, + &mut sparse_writer, + to_copy.into(), + cancel_signal, + ) + .with_context(|| format!("Failed to copy data: {:?} -> {:?}", cli.input, cli.output))?; + } + + metadata.chunks.push(chunk); + } + + display_metadata(sparse_cli, &metadata); + + sparse_reader + .finish() + .with_context(|| format!("Failed to finalize reader: {:?}", cli.input))?; + sparse_writer + .finish() + .with_context(|| format!("Failed to finalize writer: {:?}", cli.output))?; + + Ok(()) +} + +fn info_subcommand(sparse_cli: &SparseCli, cli: &InfoCli) -> Result<()> { + let reader = open_reader(&cli.input)?; + let mut sparse_reader = SparseReader::new_seekable(reader, CrcMode::Ignore) + .with_context(|| format!("Failed to read sparse file: {:?}", cli.input))?; + + let mut metadata = Metadata { + header: sparse_reader.header(), + chunks: vec![], + }; + + while let Some(chunk) = sparse_reader + .next_chunk() + .with_context(|| format!("Failed to read chunk: {:?}", cli.input))? + { + metadata.chunks.push(chunk); + } + + display_metadata(sparse_cli, &metadata); + + Ok(()) +} + +pub fn sparse_main(cli: &SparseCli, cancel_signal: &AtomicBool) -> Result<()> { + match &cli.command { + SparseCommand::Unpack(c) => unpack_subcommand(cli, c, cancel_signal), + SparseCommand::Pack(c) => pack_subcommand(cli, c, cancel_signal), + SparseCommand::Repack(c) => repack_subcommand(cli, c, cancel_signal), + SparseCommand::Info(c) => info_subcommand(cli, c), + } +} + +/// Unpack a sparse image. +#[derive(Debug, Parser)] +struct UnpackCli { + /// Path to input sparse image. + #[arg(short, long, value_name = "FILE", value_parser)] + input: PathBuf, + + /// Path to output raw image. + #[arg(short, long, value_name = "FILE", value_parser)] + output: PathBuf, + + /// Preserve existing data in the output file. + /// + /// This is useful when unpacking multiple sparse files into a single output + /// file because they contain disjoint blocks of data. + #[arg(long)] + preserve: bool, +} + +/// Pack a sparse image. +#[derive(Debug, Parser)] +struct PackCli { + /// Path to output sparse image. + /// + /// If `--region` is not used and the input file is not a (native) sparse + /// file on Linux, then the output sparse image is written with a CRC32 + /// checksum in the header. + #[arg(short, long, value_name = "FILE", value_parser)] + output: PathBuf, + + /// Path to input raw image. + /// + /// On Linux, if this is a (native) sparse file, then the unallocated + /// sections of the file will be skipped and will be stored in the output + /// file as hole chunks. + #[arg(short, long, value_name = "FILE", value_parser)] + input: PathBuf, + + /// Block size. + #[arg(short, long, value_name = "BYTES", default_value_t = 4096)] + block_size: u32, + + /// Pack certain byte regions from the file. + /// + /// The start offset will be aligned down to the block size and the end + /// offset will be aligned up. This option can be specified any number of + /// times and in any order. Overlapping regions are allowed. + /// + /// Unused regions will be stored in the sparse file as hole chunks. + #[arg(short, long, value_names = ["START", "END"], num_args = 2)] + region: Vec, +} + +/// Repack a sparse image. +/// +/// This command is equivalent to running `unpack` and `pack`, except without +/// storing the unpacked data to disk. +#[derive(Debug, Parser)] +struct RepackCli { + /// Path to input sparse image. + #[arg(short, long, value_name = "FILE", value_parser)] + input: PathBuf, + + /// Path to output sparse image. + #[arg(short, long, value_name = "FILE", value_parser)] + output: PathBuf, +} + +/// Display sparse image metadata. +#[derive(Debug, Parser)] +struct InfoCli { + /// Path to input sparse image. + #[arg(short, long, value_name = "FILE", value_parser)] + input: PathBuf, +} + +#[derive(Debug, Subcommand)] +enum SparseCommand { + Unpack(UnpackCli), + Pack(PackCli), + Repack(RepackCli), + Info(InfoCli), +} + +/// Pack, unpack, and inspect sparse images. +#[derive(Debug, Parser)] +pub struct SparseCli { + #[command(subcommand)] + command: SparseCommand, + + /// Don't print sparse image metadata. + #[arg(short, long, global = true)] + quiet: bool, +} diff --git a/avbroot/src/format/lp.rs b/avbroot/src/format/lp.rs index 47406d4..8cec2c6 100644 --- a/avbroot/src/format/lp.rs +++ b/avbroot/src/format/lp.rs @@ -166,7 +166,7 @@ const _: () = assert!(mem::size_of::() < GEOMETRY_SIZE as usize); impl fmt::Debug for RawGeometry { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RawGeometry") - .field("magic", &format_args!("{:#08x}", self.magic.get())) + .field("magic", &format_args!("{:#010x}", self.magic.get())) .field("struct_size", &self.struct_size.get()) .field("checksum", &hex::encode(self.checksum)) .field("metadata_max_size", &self.metadata_max_size.get()) @@ -182,7 +182,7 @@ impl RawGeometry { fn validate(&self) -> Result<()> { if self.magic.get() != GEOMETRY_MAGIC { return Err(Error::Geometry(format!( - "Invalid magic: {:#08x}", + "Invalid magic: {:#010x}", self.magic.get(), ))); } @@ -332,7 +332,7 @@ struct RawHeader { impl fmt::Debug for RawHeader { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RawHeader") - .field("magic", &format_args!("{:#08x}", self.magic.get())) + .field("magic", &format_args!("{:#010x}", self.magic.get())) .field("major_version", &self.major_version.get()) .field("minor_version", &self.minor_version.get()) .field("header_size", &self.header_size.get()) @@ -392,7 +392,7 @@ impl RawHeader { fn validate(&self, geometry: &RawGeometry) -> Result<()> { if self.magic.get() != HEADER_MAGIC { return Err(Error::Header(format!( - "Invalid magic: {:#08x}", + "Invalid magic: {:#010x}", self.magic.get(), ))); } diff --git a/avbroot/src/format/mod.rs b/avbroot/src/format/mod.rs index af6ffaa..4ceea9a 100644 --- a/avbroot/src/format/mod.rs +++ b/avbroot/src/format/mod.rs @@ -13,4 +13,5 @@ pub mod lp; pub mod ota; pub mod padding; pub mod payload; +pub mod sparse; pub mod verityrs; diff --git a/avbroot/src/format/sparse.rs b/avbroot/src/format/sparse.rs new file mode 100644 index 0000000..756cfcb --- /dev/null +++ b/avbroot/src/format/sparse.rs @@ -0,0 +1,1211 @@ +/* + * SPDX-FileCopyrightText: 2024 Andrew Gunnerson + * SPDX-License-Identifier: GPL-3.0-only + */ + +use std::{ + fmt, + io::{self, Read, Seek, SeekFrom, Write}, + mem, + ops::Range, +}; + +use crc32fast::Hasher; +use dlv_list::{Index, VecList}; +use thiserror::Error; +use zerocopy::{byteorder::little_endian, AsBytes, FromBytes, FromZeroes, Unaligned}; + +/// Magic value for [`RawHeader::magic`]. +const HEADER_MAGIC: u32 = 0xed26ff3a; + +/// Raw chunk type for [`RawChunk::chunk_type`]. +const CHUNK_TYPE_RAW: u16 = 0xcac1; +/// Fill chunk type for [`RawChunk::chunk_type`]. +const CHUNK_TYPE_FILL: u16 = 0xcac2; +/// Hole chunk type for [`RawChunk::chunk_type`]. +const CHUNK_TYPE_DONT_CARE: u16 = 0xcac3; +/// CRC32 chunk type for [`RawChunk::chunk_type`]. +const CHUNK_TYPE_CRC32: u16 = 0xcac4; + +/// Supported major version. +pub const MAJOR_VERSION: u16 = 1; +/// Supported minor version. +pub const MINOR_VERSION: u16 = 0; + +#[derive(Debug, Error)] +pub enum Error { + #[error("Sparse header: {0}")] + Header(String), + #[error("Sparse chunk #{0}: {1}")] + Chunk(u32, String), + #[error("Sparse reader: {0}")] + Reader(String), + #[error("Sparse writer: {0}")] + Writer(String), + #[error("I/O error")] + Io(#[from] io::Error), +} + +type Result = std::result::Result; + +/// Raw on-disk layout for the header. +#[derive(Clone, Copy, FromZeroes, FromBytes, AsBytes, Unaligned)] +#[repr(packed)] +struct RawHeader { + /// Magic value. This should be equal to [`HEADER_MAGIC`]. + magic: little_endian::U32, + /// Major version. [`MAJOR_VERSION`] is the only version supported. All + /// other versions cannot be parsed. + major_version: little_endian::U16, + /// Minor version. Versions aside from [`MINOR_VERSION`] can be read, but + /// not written. + minor_version: little_endian::U16, + /// Size of this [`RawHeader`]. + file_hdr_sz: little_endian::U16, + /// Size of a [`RawChunk`]. + chunk_hdr_sz: little_endian::U16, + /// Block size in bytes. Must be a multiple of 4. + blk_sz: little_endian::U32, + /// Number of blocks when unsparsed. + total_blks: little_endian::U32, + /// Number of chunks. + total_chunks: little_endian::U32, + /// CRC32 checksum of the original data. + image_checksum: little_endian::U32, +} + +impl fmt::Debug for RawHeader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RawHeader") + .field("magic", &format_args!("{:#010x}", self.magic)) + .field("major_version", &self.major_version.get()) + .field("minor_version", &self.minor_version.get()) + .field("file_hdr_sz", &self.file_hdr_sz.get()) + .field("chunk_hdr_sz", &self.chunk_hdr_sz.get()) + .field("blk_sz", &self.blk_sz.get()) + .field("total_blks", &self.total_blks.get()) + .field("total_chunks", &self.total_chunks.get()) + .field( + "image_checksum", + &format_args!("{:#010x}", self.image_checksum.get()), + ) + .finish() + } +} + +impl RawHeader { + fn validate(&self) -> Result<()> { + if self.magic.get() != HEADER_MAGIC { + return Err(Error::Header(format!( + "Invalid magic: {:#010x}", + self.magic.get(), + ))); + } + + if self.major_version.get() != MAJOR_VERSION { + return Err(Error::Header(format!( + "Unsupported major version: {}", + self.major_version.get(), + ))); + } + + if self.file_hdr_sz.get() != mem::size_of::() as u16 { + return Err(Error::Header(format!( + "Invalid file header size: {}", + self.file_hdr_sz.get(), + ))); + } else if self.chunk_hdr_sz.get() != mem::size_of::() as u16 { + return Err(Error::Header(format!( + "Invalid chunk header size: {}", + self.chunk_hdr_sz.get(), + ))); + } + + if self.blk_sz.get() == 0 || self.blk_sz.get() % 4 != 0 { + return Err(Error::Header(format!( + "Invalid block size: {}", + self.blk_sz.get(), + ))); + } + + Ok(()) + } +} + +/// Raw on-disk layout for the chunk header. +#[derive(Clone, Copy, FromZeroes, FromBytes, AsBytes, Unaligned)] +#[repr(packed)] +struct RawChunk { + /// Chunk type. Must be [`CHUNK_TYPE_RAW`], [`CHUNK_TYPE_FILL`], + /// [`CHUNK_TYPE_DONT_CARE`], or [`CHUNK_TYPE_CRC32`]. + chunk_type: little_endian::U16, + /// Unused. + reserved1: little_endian::U16, + /// Number of unsparsed blocks this chunk represents. + chunk_sz: little_endian::U32, + /// The size in bytes of this chunk, including this [`RawChunk`]. + total_sz: little_endian::U32, +} + +impl fmt::Debug for RawChunk { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RawChunk") + .field("chunk_type", &self.chunk_type.get()) + .field("reserved1", &format_args!("{:#010x}", self.reserved1.get())) + .field("chunk_sz", &self.chunk_sz.get()) + .field("total_sz", &self.total_sz.get()) + .finish() + } +} + +impl RawChunk { + fn expected_size(&self, index: u32, header: &RawHeader) -> Result { + let data_size = match self.chunk_type.get() { + CHUNK_TYPE_RAW => self + .chunk_sz + .get() + .checked_mul(header.blk_sz.get()) + .ok_or_else(|| { + Error::Chunk( + index, + format!( + "Chunk size overflow: {} * {}", + self.chunk_sz.get(), + header.blk_sz.get(), + ), + ) + })?, + CHUNK_TYPE_FILL | CHUNK_TYPE_CRC32 => 4, + CHUNK_TYPE_DONT_CARE => 0, + t => return Err(Error::Chunk(index, format!("Invalid chunk type: {t}"))), + }; + + data_size + .checked_add(mem::size_of::() as u32) + .ok_or_else(|| Error::Chunk(index, format!("Data size too large: {data_size}"))) + } + + fn validate(&self, index: u32, header: &RawHeader, start_block: u32) -> Result<()> { + let end_block = start_block + .checked_add(self.chunk_sz.get()) + .ok_or_else(|| { + Error::Chunk( + index, + format!( + "Block count overflow: {start_block} + {}", + self.chunk_sz.get(), + ), + ) + })?; + + if end_block > header.total_blks.get() { + return Err(Error::Chunk( + index, + format!( + "End block {end_block} exceeds total blocks {}", + header.total_blks.get(), + ), + ))?; + } + + if self.chunk_type.get() == CHUNK_TYPE_CRC32 && self.chunk_sz.get() != 0 { + return Err(Error::Chunk( + index, + format!( + "CRC32 chunk has non-zero blocks: {:?}", + start_block..end_block, + ), + )); + } + + let expected_size = self.expected_size(index, header)?; + + if expected_size != self.total_sz.get() { + return Err(Error::Chunk( + index, + format!( + "Expected total size {expected_size}, but have {}", + self.total_sz.get(), + ), + )); + } + + Ok(()) + } +} + +/// Sparse file header. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Header { + /// Major version. [`MAJOR_VERSION`] is the only version supported. All + /// other versions cannot be parsed. + pub major_version: u16, + /// Minor version. Versions aside from [`MINOR_VERSION`] can be read, but + /// not written. + pub minor_version: u16, + /// Block size in bytes. Must be a multiple of 4. + pub block_size: u32, + /// Number of blocks when unsparsed. + pub num_blocks: u32, + /// Number of chunks. + pub num_chunks: u32, + /// CRC32 checksum of the original data. + pub crc32: u32, +} + +impl fmt::Debug for Header { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Header") + .field("major_version", &self.major_version) + .field("minor_version", &self.minor_version) + .field("block_size", &self.block_size) + .field("num_blocks", &self.num_blocks) + .field("num_chunks", &self.num_chunks) + .field("crc32", &format_args!("{:#010x}", self.crc32)) + .finish() + } +} + +/// Half-open range indicating the block range that a chunk covers. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct ChunkBounds { + /// Starting block (inclusive). + pub start: u32, + /// Ending block (exclusive). + pub end: u32, +} + +impl fmt::Debug for ChunkBounds { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}..{}", self.start, self.end) + } +} + +impl IntoIterator for ChunkBounds { + type Item = u32; + + type IntoIter = Range; + + fn into_iter(self) -> Self::IntoIter { + self.start..self.end + } +} + +impl ChunkBounds { + /// Length in blocks. + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> u32 { + self.end - self.start + } +} + +/// The type of data contained in a chunk. +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum ChunkData { + /// The chunk is filled with raw data. + Data, + /// The chunk is filled with repeating patterns of the specified integer + /// encoded in little-endian. + Fill(u32), + /// The chunk is a hole and does not represent useful or valid data. + Hole, + /// The chunk is a CRC32 checksum. This does not represent actual data but + /// serves as a checkpoint for validating the current checksum while in the + /// middle of the sparse file. + Crc32(u32), +} + +impl fmt::Debug for ChunkData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Data => write!(f, "Data"), + Self::Fill(value) => f + .debug_tuple("Fill") + .field(&format_args!("{value:#010x}")) + .finish(), + Self::Hole => write!(f, "Hole"), + Self::Crc32(checksum) => f + .debug_tuple("Crc32") + .field(&format_args!("{checksum:#010x}")) + .finish(), + } + } +} + +/// A type that represents a contiguous list of blocks and the type of data or +/// metadata they contain. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Chunk { + pub bounds: ChunkBounds, + pub data: ChunkData, +} + +impl fmt::Debug for Chunk { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Chunk") + .field("bounds", &self.bounds) + .field("data", &format_args!("{:?}", self.data)) + .finish() + } +} + +/// A type for computing the minimal number of chunks for storing some given +/// data. Adding chunks sequentially is most efficient, though chunks can be +/// added in any order. Adding a new chunk that overlaps an existing chunk will +/// remove, truncate, or split the existing chunk accordingly. +#[derive(Clone, Debug, Default)] +pub struct ChunkList { + chunks: VecList, + last_used: Option>, + size: u32, +} + +impl ChunkList { + pub fn new() -> Self { + Self::default() + } + + /// Split the previous chunk if its bounds contain the specified chunk. + fn split_prev(&mut self, index: Index) { + let Some(prev_index) = self.chunks.get_previous_index(index) else { + return; + }; + + let cur = *self.chunks.get(index).unwrap(); + let prev = self.chunks.get_mut(prev_index).unwrap(); + + debug_assert!(prev.bounds.start <= cur.bounds.start); + + if prev.bounds.end > cur.bounds.end { + let new_chunk = Chunk { + bounds: ChunkBounds { + start: cur.bounds.end, + end: prev.bounds.end, + }, + data: prev.data, + }; + + prev.bounds.end = cur.bounds.start; + self.chunks.insert_after(index, new_chunk); + } + } + + /// Merge the chunk at the specified index upwards until there are no more + /// mergeable chunks. Returns the index of the new chunk that contains the + /// original chunk. + fn merge_down(&mut self, mut index: Index) -> Index { + while let Some(prev_index) = self.chunks.get_previous_index(index) { + let cur = *self.chunks.get(index).unwrap(); + let prev = self.chunks.get_mut(prev_index).unwrap(); + + debug_assert!(prev.bounds.start <= cur.bounds.start); + + if prev.bounds.end < cur.bounds.start { + // There's a gap. + break; + } else if cur.bounds.start <= prev.bounds.start { + // Current chunk completely overlaps the previous chunk, so + // remove the previous chunk. + self.chunks.remove(prev_index); + continue; + } else if cur.bounds.start < prev.bounds.end { + // Current chunk partially overlaps the previous chunk, so + // truncate the previous chunk. + prev.bounds.end = cur.bounds.start; + } + + // If the data is compatible, then merge the chunks. + if cur.data == prev.data { + prev.bounds.end = cur.bounds.end; + self.chunks.remove(index); + index = prev_index; + } + + break; + } + + index + } + + /// Merge the chunk at the specified index downwards until there are no more + /// mergeable chunks. Returns the index of the new chunk that contains the + /// original chunk. + fn merge_up(&mut self, mut index: Index) -> Index { + while let Some(next_index) = self.chunks.get_next_index(index) { + let cur = *self.chunks.get(index).unwrap(); + let next = self.chunks.get_mut(next_index).unwrap(); + + debug_assert!(cur.bounds.start <= next.bounds.start); + + if cur.bounds.end < next.bounds.start { + // There's a gap. + break; + } else if cur.bounds.end >= next.bounds.end { + // Current chunk completely overlaps the next chunk, so remove + // the next chunk. + self.chunks.remove(next_index); + continue; + } else if cur.bounds.end > next.bounds.start { + // Current chunk partially overlaps the next chunk, so truncate + // the next chunk. + next.bounds.start = cur.bounds.end; + } + + // If the data is compatible, then merge the chunks. + if cur.data == next.data { + next.bounds.start = cur.bounds.start; + self.chunks.remove(index); + index = next_index; + } + + break; + } + + index + } + + /// Add the specified chunk into the list, removing, truncating, splitting, + /// or merging chunks as needed. Returns the index of the chunk that + /// contains the original chunk. + fn add_chunk(&mut self, chunk: Chunk) -> Index { + // Trivial case: adding the first chunk. + if self.chunks.is_empty() { + let index = self.chunks.push_back(chunk); + self.last_used = Some(index); + self.size = self.size.max(chunk.bounds.end); + return index; + } + + // Find the chunk to insert before. We save the last used index to + // optimize for sequential insertion and avoid needing to search the + // entire list every time. + let mut insert_before = self.chunks.front_index(); + + if let Some(last_used) = self.last_used { + if chunk.bounds.start >= self.chunks.get(last_used).unwrap().bounds.start { + // The new chunk starts after the last used chunk. + insert_before = Some(last_used); + } + } + + while let Some(index) = insert_before { + if self.chunks.get(index).unwrap().bounds.start >= chunk.bounds.start { + break; + } + + insert_before = self.chunks.get_next_index(index); + } + + let mut chunk_index = if let Some(index) = insert_before { + self.chunks.insert_before(index, chunk) + } else { + self.chunks.push_back(chunk) + }; + + // Split the previous chunk if it fully contains the new chunk. + self.split_prev(chunk_index); + + // Merge with adjancent chunks if compatible. + chunk_index = self.merge_up(chunk_index); + chunk_index = self.merge_down(chunk_index); + + self.last_used = Some(chunk_index); + self.size = self.size.max(chunk.bounds.end); + + chunk_index + } + + /// Insert actual data at the specified region. + pub fn insert_data(&mut self, bounds: ChunkBounds) { + self.add_chunk(Chunk { + bounds, + data: ChunkData::Data, + }); + } + + /// Insert a fill chunk at the specified region. The fill value is encoded + /// in little-endian. + pub fn insert_fill(&mut self, bounds: ChunkBounds, fill_value: u32) { + self.add_chunk(Chunk { + bounds, + data: ChunkData::Fill(fill_value), + }); + } + + /// Punch a hole at the specified region. If a hole is punched at the end + /// of the file, the file size does not decrease. + pub fn insert_hole(&mut self, bounds: ChunkBounds) { + let index = self.add_chunk(Chunk { + bounds, + data: ChunkData::Hole, + }); + + // Special case: we don't actually store holes. + self.last_used = self.chunks.get_previous_index(index); + self.chunks.remove(index); + } + + /// Get the file size in blocks. + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> u32 { + self.size + } + + /// Set the file size in blocks. This automatically increases when adding a + /// new chunk beyond this bound. + pub fn set_len(&mut self, size: u32) { + if size < self.size { + self.insert_hole(ChunkBounds { + start: size, + end: self.size, + }); + } + + self.size = size; + } + + /// Get the list of chunks, including all holes. + pub fn to_chunks(&self) -> Vec { + let mut result = Vec::with_capacity(self.chunks.len()); + let mut block = 0; + + for chunk in &self.chunks { + if chunk.bounds.start != block { + result.push(Chunk { + bounds: ChunkBounds { + start: block, + end: chunk.bounds.start, + }, + data: ChunkData::Hole, + }); + } + + result.push(*chunk); + + block = chunk.bounds.end; + } + + if block != self.size { + result.push(Chunk { + bounds: ChunkBounds { + start: block, + end: self.size, + }, + data: ChunkData::Hole, + }); + } + + result + } + + /// Iterate through allocated chunks, which excludes holes. + pub fn iter_allocated(&self) -> impl Iterator + '_ { + self.chunks.iter().copied() + } +} + +/// Whether to validate CRC32 checksums. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum CrcMode { + Validate, + Ignore, +} + +/// Hash what a fill chunk's contents would be if it were unsparsed. +fn hash_fill_chunk( + raw_chunk: &RawChunk, + fill_value: little_endian::U32, + raw_header: &RawHeader, + hasher: &mut Hasher, +) { + let buf = [fill_value; 1024]; + let mut remain = u64::from(raw_chunk.chunk_sz) * u64::from(raw_header.blk_sz); + + while remain > 0 { + let n = remain.min(buf.as_bytes().len() as u64) as usize; + hasher.update(&buf.as_bytes()[..n]); + remain -= n as u64; + } +} + +/// A type for reading sparse files. +pub struct SparseReader { + inner: R, + seek: Option io::Result>, + header: RawHeader, + /// Starting block for next chunk. + block: u32, + /// Next chunk to read. + chunk: u32, + /// Number of bytes left to read for the current chunk if the chunk has + /// [`ChunkData::Data`]. + data_remain: u32, + hasher: Option, +} + +impl SparseReader { + /// Create a new reader from a seekable file. This allows data chunks to be + /// efficiently skipped without reading them. + pub fn new_seekable(inner: R, crc_mode: CrcMode) -> Result { + let mut result = Self::new(inner, crc_mode)?; + result.seek = Some(Seek::seek); + Ok(result) + } +} + +impl SparseReader { + /// Create a new reader from a stream. This cannot efficiently skip reading + /// data chunks if they are not needed. If the underlying file is seekable + /// and skipping chunks is needed, use [`Self::new_seekable`] instead. + pub fn new(mut inner: R, crc_mode: CrcMode) -> Result { + let mut header = RawHeader::new_zeroed(); + inner.read_exact(header.as_bytes_mut())?; + + header.validate()?; + + Ok(Self { + inner, + seek: None, + header, + block: 0, + chunk: 0, + data_remain: 0, + hasher: match crc_mode { + CrcMode::Validate => Some(Hasher::new()), + CrcMode::Ignore => None, + }, + }) + } + + /// Get the sparse file header. + pub fn header(&self) -> Header { + Header { + major_version: self.header.major_version.get(), + minor_version: self.header.minor_version.get(), + block_size: self.header.blk_sz.get(), + num_blocks: self.header.total_blks.get(), + num_chunks: self.header.total_chunks.get(), + crc32: self.header.image_checksum.get(), + } + } + + /// Read the header for the next chunk. If the previous chunk had + /// [`ChunkData::Data`], the data must be fully read first unless the + /// reader is seekable and CRC validation is disabled. If the last chunk has + /// already been read, then [`None`] is returned. + /// + /// For chunks with [`ChunkData::Crc32`], if CRC validation is enabled, the + /// checksum will have already been verified. The caller does not need to + /// perform its own verification. + pub fn next_chunk(&mut self) -> Result> { + if self.data_remain != 0 { + if let Some(seek) = self.seek { + if self.hasher.is_some() { + return Err(Error::Reader( + "Cannot skip data when CRC validation is enabled".into(), + )); + } + + seek(&mut self.inner, SeekFrom::Current(self.data_remain.into()))?; + self.data_remain = 0; + } else { + return Err(Error::Reader(format!( + "Previous chunk still has {} bytes remaining", + self.data_remain, + ))); + } + } + + if self.chunk == self.header.total_chunks.get() { + return Ok(None); + } + + let mut raw_chunk = RawChunk::new_zeroed(); + self.inner.read_exact(raw_chunk.as_bytes_mut())?; + + raw_chunk.validate(self.chunk, &self.header, self.block)?; + + let data: ChunkData; + + match raw_chunk.chunk_type.get() { + CHUNK_TYPE_RAW => { + self.data_remain = + raw_chunk.total_sz.get() - u32::from(self.header.chunk_hdr_sz.get()); + + data = ChunkData::Data; + } + CHUNK_TYPE_FILL => { + let mut fill_value = little_endian::U32::new_zeroed(); + self.inner.read_exact(fill_value.as_bytes_mut())?; + + if let Some(hasher) = &mut self.hasher { + hash_fill_chunk(&raw_chunk, fill_value, &self.header, hasher); + } + + data = ChunkData::Fill(fill_value.get()); + } + CHUNK_TYPE_DONT_CARE => { + if let Some(hasher) = &mut self.hasher { + hash_fill_chunk(&raw_chunk, 0.into(), &self.header, hasher); + } + + data = ChunkData::Hole; + } + CHUNK_TYPE_CRC32 => { + let mut expected = little_endian::U32::new_zeroed(); + self.inner.read_exact(expected.as_bytes_mut())?; + + if let Some(hasher) = &mut self.hasher { + let actual = hasher.clone().finalize(); + + if actual != expected.get() { + return Err(Error::Reader(format!( + "Expected checkpoint CRC32 {expected:08x}, but have {actual:08x}", + ))); + } + } + + data = ChunkData::Crc32(expected.get()); + } + _ => unreachable!(), + }; + + let chunk = Chunk { + bounds: ChunkBounds { + start: self.block, + end: self.block + raw_chunk.chunk_sz.get(), + }, + data, + }; + + self.chunk += 1; + self.block = chunk.bounds.end; + + Ok(Some(chunk)) + } + + /// Verify the final checksum and return the underlying reader. + pub fn finish(self) -> Result { + if let Some(hasher) = self.hasher { + let expected = self.header.image_checksum.get(); + if expected != 0 { + let actual = hasher.finalize(); + + if actual != expected { + return Err(Error::Reader(format!( + "Expected final CRC32 {expected:08x}, but have {actual:08x}", + ))); + } + } + } + + Ok(self.inner) + } +} + +impl Read for SparseReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let to_read = buf.len().min(self.data_remain as usize); + + let n = self.inner.read(&mut buf[..to_read])?; + + if let Some(hasher) = &mut self.hasher { + hasher.update(&buf[..n]); + } + + self.data_remain -= n as u32; + + Ok(n) + } +} + +/// A type for writing sparse files. +pub struct SparseWriter { + inner: W, + header: RawHeader, + /// Starting block for next chunk. + block: u32, + /// Next chunk to write. + chunk: u32, + /// Number of bytes left to write for the current chunk if the chunk has + /// [`ChunkData::Data`]. + data_remain: u32, + hasher: Hasher, +} + +impl SparseWriter { + /// Create a new writer from a stream. This does not require the underlying + /// file to be seekable, so the [`Header`] must be fully known up front. + pub fn new(mut inner: W, header: Header) -> Result { + if header.minor_version != MINOR_VERSION { + return Err(Error::Writer(format!( + "Minor version not supported for writing: {}", + header.minor_version, + ))); + } + + let header = RawHeader { + magic: HEADER_MAGIC.into(), + major_version: header.major_version.into(), + minor_version: header.minor_version.into(), + file_hdr_sz: (mem::size_of::() as u16).into(), + chunk_hdr_sz: (mem::size_of::() as u16).into(), + blk_sz: header.block_size.into(), + total_blks: header.num_blocks.into(), + total_chunks: header.num_chunks.into(), + image_checksum: header.crc32.into(), + }; + + header.validate()?; + + inner.write_all(header.as_bytes())?; + + Ok(Self { + inner, + header, + block: 0, + chunk: 0, + data_remain: 0, + // We include this unconditionally because we don't know if we'll + // get any CRC32 chunks later. + hasher: Hasher::new(), + }) + } + + /// Write the header for the next chunk. If the previous chunk had + /// [`ChunkData::Data`], the data must be fully written first. + pub fn start_chunk(&mut self, chunk: Chunk) -> Result<()> { + if self.data_remain != 0 { + return Err(Error::Writer(format!( + "Previous chunk still has {} bytes remaining", + self.data_remain, + ))); + } + + if self.chunk == self.header.total_chunks.get() { + return Err(Error::Writer("Already wrote all chunk headers".into())); + } + + if chunk.bounds.start != self.block { + return Err(Error::Writer(format!( + "Gap between end of last chunk {} and start of new chunk {}", + self.block, chunk.bounds.start, + ))); + } + + let mut raw_chunk = RawChunk { + chunk_type: match chunk.data { + ChunkData::Data => CHUNK_TYPE_RAW.into(), + ChunkData::Fill(_) => CHUNK_TYPE_FILL.into(), + ChunkData::Hole => CHUNK_TYPE_DONT_CARE.into(), + ChunkData::Crc32(_) => CHUNK_TYPE_CRC32.into(), + }, + reserved1: 0.into(), + chunk_sz: chunk.bounds.len().into(), + total_sz: 0.into(), + }; + + raw_chunk.total_sz = raw_chunk.expected_size(self.chunk, &self.header)?.into(); + + raw_chunk.validate(self.chunk, &self.header, self.block)?; + + self.chunk += 1; + self.block = chunk.bounds.end; + + self.inner.write_all(raw_chunk.as_bytes())?; + + match chunk.data { + ChunkData::Data => { + self.data_remain = + raw_chunk.total_sz.get() - u32::from(self.header.chunk_hdr_sz.get()); + } + ChunkData::Fill(fill_value) => { + self.inner.write_all(&fill_value.to_le_bytes())?; + + hash_fill_chunk( + &raw_chunk, + fill_value.into(), + &self.header, + &mut self.hasher, + ); + } + ChunkData::Hole => { + hash_fill_chunk(&raw_chunk, 0.into(), &self.header, &mut self.hasher); + } + ChunkData::Crc32(expected) => { + self.inner.write_all(&expected.to_le_bytes())?; + + let actual = self.hasher.clone().finalize(); + if actual != expected { + return Err(Error::Reader(format!( + "Expected checkpoint CRC32 {expected:08x}, but have {actual:08x}", + ))); + } + } + } + + Ok(()) + } + + /// Verify the final checksum and return the underlying writer. + pub fn finish(self) -> Result { + let expected = self.header.image_checksum.get(); + if expected != 0 { + let actual = self.hasher.finalize(); + + if actual != expected { + return Err(Error::Reader(format!( + "Expected final CRC32 {expected:08x}, but have {actual:08x}", + ))); + } + } + + Ok(self.inner) + } +} + +impl Write for SparseWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + let to_write = buf.len().min(self.data_remain as usize); + + let n = self.inner.write(&buf[..to_write])?; + + self.hasher.update(&buf[..n]); + + self.data_remain -= n as u32; + + Ok(n) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} + +#[cfg(test)] +mod tests { + use super::{Chunk, ChunkBounds, ChunkData, ChunkList}; + + #[test] + fn chunk_list_merge() { + let mut list = ChunkList::new(); + + // Insert adjacent blocks in non-sequential order. + list.insert_fill(ChunkBounds { start: 1, end: 2 }, 0xaaaaaaaa); + list.insert_fill(ChunkBounds { start: 0, end: 1 }, 0xaaaaaaaa); + list.insert_fill(ChunkBounds { start: 2, end: 3 }, 0xaaaaaaaa); + assert_eq!( + list.to_chunks(), + vec![Chunk { + bounds: ChunkBounds { start: 0, end: 3 }, + data: ChunkData::Fill(0xaaaaaaaa), + },] + ); + } + + #[test] + fn chunk_list_overlap() { + let mut list = ChunkList::new(); + + // Replace existing chunks with a new chunk that ends at the same block, + // but starts earlier. + list.insert_fill(ChunkBounds { start: 2, end: 3 }, 0xaaaaaaaa); + list.insert_fill(ChunkBounds { start: 3, end: 4 }, 0xaaaaaaaa); + list.insert_fill(ChunkBounds { start: 1, end: 4 }, 0xbbbbbbbb); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Hole, + }, + Chunk { + bounds: ChunkBounds { start: 1, end: 4 }, + data: ChunkData::Fill(0xbbbbbbbb), + }, + ] + ); + + // Replace existing chunks with a new chunk that starts at the same + // block, but ends later. + list.insert_fill(ChunkBounds { start: 1, end: 5 }, 0xcccccccc); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Hole, + }, + Chunk { + bounds: ChunkBounds { start: 1, end: 5 }, + data: ChunkData::Fill(0xcccccccc), + }, + ] + ); + + // Replace existing chunks with a new chunk that's larger in both + // directions. + list.insert_fill(ChunkBounds { start: 0, end: 6 }, 0xdddddddd); + assert_eq!( + list.to_chunks(), + vec![Chunk { + bounds: ChunkBounds { start: 0, end: 6 }, + data: ChunkData::Fill(0xdddddddd), + },] + ); + + // Replace existing chunks with a new chunk that falls on the same + // boundaries exactly. + list.insert_fill(ChunkBounds { start: 0, end: 6 }, 0xeeeeeeee); + assert_eq!( + list.to_chunks(), + vec![Chunk { + bounds: ChunkBounds { start: 0, end: 6 }, + data: ChunkData::Fill(0xeeeeeeee), + },] + ); + } + + #[test] + fn chunk_list_split_chunk() { + let mut list = ChunkList::new(); + + // Insert a different chunk type into the middle of an existing chunk. + list.insert_fill(ChunkBounds { start: 0, end: 3 }, 0xaaaaaaaa); + list.insert_fill(ChunkBounds { start: 1, end: 2 }, 0xbbbbbbbb); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Fill(0xaaaaaaaa), + }, + Chunk { + bounds: ChunkBounds { start: 1, end: 2 }, + data: ChunkData::Fill(0xbbbbbbbb), + }, + Chunk { + bounds: ChunkBounds { start: 2, end: 3 }, + data: ChunkData::Fill(0xaaaaaaaa), + }, + ] + ); + + // Insert a chunk of the same type into the middle. + list.insert_fill(ChunkBounds { start: 0, end: 3 }, 0xcccccccc); + list.insert_fill(ChunkBounds { start: 1, end: 2 }, 0xcccccccc); + assert_eq!( + list.to_chunks(), + vec![Chunk { + bounds: ChunkBounds { start: 0, end: 3 }, + data: ChunkData::Fill(0xcccccccc), + },] + ); + } + + #[test] + fn chunk_list_punch_hole() { + let mut list = ChunkList::new(); + + // Punch a hole in the middle. + list.insert_fill(ChunkBounds { start: 0, end: 3 }, 0xaaaaaaaa); + list.insert_hole(ChunkBounds { start: 1, end: 2 }); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Fill(0xaaaaaaaa), + }, + Chunk { + bounds: ChunkBounds { start: 1, end: 2 }, + data: ChunkData::Hole, + }, + Chunk { + bounds: ChunkBounds { start: 2, end: 3 }, + data: ChunkData::Fill(0xaaaaaaaa), + }, + ] + ); + + // Punch a hole at the end. The file size should not decrease. + list.insert_hole(ChunkBounds { start: 2, end: 3 }); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Fill(0xaaaaaaaa), + }, + Chunk { + bounds: ChunkBounds { start: 1, end: 3 }, + data: ChunkData::Hole, + }, + ] + ); + + // Make the entire file a hole. + list.insert_hole(ChunkBounds { start: 0, end: 1 }); + assert_eq!( + list.to_chunks(), + vec![Chunk { + bounds: ChunkBounds { start: 0, end: 3 }, + data: ChunkData::Hole, + },] + ); + } + + #[test] + fn chunk_list_set_len() { + let mut list = ChunkList::new(); + + // Truncate the file. + list.insert_fill(ChunkBounds { start: 0, end: 3 }, 0xaaaaaaaa); + list.set_len(2); + assert_eq!( + list.to_chunks(), + vec![Chunk { + bounds: ChunkBounds { start: 0, end: 2 }, + data: ChunkData::Fill(0xaaaaaaaa), + },] + ); + + // Expand the file. + list.set_len(3); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 2 }, + data: ChunkData::Fill(0xaaaaaaaa), + }, + Chunk { + bounds: ChunkBounds { start: 2, end: 3 }, + data: ChunkData::Hole, + }, + ] + ); + + // Clear the file. + list.set_len(0); + assert_eq!(list.to_chunks(), vec![]); + + // File size should remain the same when adding a chunk that does not + // force an expansion. + list.set_len(3); + list.insert_fill(ChunkBounds { start: 0, end: 1 }, 0xbbbbbbbb); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Fill(0xbbbbbbbb), + }, + Chunk { + bounds: ChunkBounds { start: 1, end: 3 }, + data: ChunkData::Hole, + }, + ] + ); + } +} diff --git a/avbroot/tests/sparse.rs b/avbroot/tests/sparse.rs new file mode 100644 index 0000000..b2d3b85 --- /dev/null +++ b/avbroot/tests/sparse.rs @@ -0,0 +1,173 @@ +/* + * SPDX-FileCopyrightText: 2024 Andrew Gunnerson + * SPDX-License-Identifier: GPL-3.0-only + */ + +use std::io::{Cursor, Read, Write}; + +use avbroot::format::sparse::{ + self, Chunk, ChunkBounds, ChunkData, CrcMode, Header, SparseReader, SparseWriter, +}; + +#[derive(Clone, Copy)] +struct TestChunk { + chunk: Chunk, + data: &'static [u8], +} + +fn round_trip(block_size: u32, crc32: u32, test_chunks: &[TestChunk], sha512: &[u8; 64]) { + let num_blocks = test_chunks.iter().map(|d| d.chunk.bounds.len()).sum(); + let header = Header { + major_version: sparse::MAJOR_VERSION, + minor_version: sparse::MINOR_VERSION, + block_size, + num_blocks, + num_chunks: test_chunks.len() as u32, + crc32, + }; + + let writer = Cursor::new(Vec::new()); + let mut sparse_writer = SparseWriter::new(writer, header).unwrap(); + + for test_chunk in test_chunks { + sparse_writer.start_chunk(test_chunk.chunk).unwrap(); + + if !test_chunk.data.is_empty() { + sparse_writer.write_all(test_chunk.data).unwrap(); + } + } + + let writer = sparse_writer.finish().unwrap(); + let data = writer.into_inner(); + + assert_eq!( + ring::digest::digest(&ring::digest::SHA512, &data).as_ref(), + sha512, + ); + + let reader = Cursor::new(&data); + let mut sparse_reader = SparseReader::new(reader, CrcMode::Validate).unwrap(); + + assert_eq!(sparse_reader.header(), header); + + let mut test_chunks_iter = test_chunks.iter(); + + while let Some(chunk) = sparse_reader.next_chunk().unwrap() { + let test_chunk = test_chunks_iter.next().unwrap(); + + assert_eq!(chunk, test_chunk.chunk); + + if !test_chunk.data.is_empty() { + let mut buf = vec![]; + sparse_reader.read_to_end(&mut buf).unwrap(); + + assert_eq!(buf, test_chunk.data); + } + } + + assert!(test_chunks_iter.next().is_none()); +} + +#[test] +fn round_trip_full_image() { + let block_size = 8; + let file_crc32 = 0xf6e23567; + let test_chunks = [ + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Data, + }, + data: b"\x00\x01\x02\x03\x04\x05\x06\x07", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 1, end: 1 }, + data: ChunkData::Crc32(0x88aa689f), + }, + data: b"", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 1, end: 2 }, + data: ChunkData::Fill(0x01234567), + }, + data: b"", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 2, end: 3 }, + data: ChunkData::Data, + }, + data: b"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 3, end: 3 }, + data: ChunkData::Crc32(0xf6e23567), + }, + data: b"", + }, + ]; + let sha512 = [ + 0x19, 0x5f, 0xa7, 0xdb, 0x18, 0xc6, 0xb9, 0x0e, 0xce, 0x4b, 0x4f, 0x35, 0x36, 0x79, 0x46, + 0x02, 0x7a, 0x45, 0x66, 0x63, 0x0e, 0xd9, 0x76, 0x93, 0x2b, 0x88, 0xe2, 0xbc, 0x0b, 0xd9, + 0x1f, 0x21, 0x51, 0x92, 0x00, 0x2e, 0xe3, 0xa2, 0xff, 0x24, 0xea, 0xef, 0x24, 0xd5, 0x24, + 0xf0, 0x46, 0xf3, 0x10, 0x32, 0xf4, 0xa6, 0x3b, 0x9d, 0xcd, 0xc5, 0x57, 0xf4, 0xc0, 0xe8, + 0x01, 0xe8, 0x1d, 0xb3, + ]; + + round_trip(block_size, file_crc32, &test_chunks, &sha512); +} + +#[test] +fn round_trip_partial_image() { + let block_size = 8; + let file_crc32 = 0; + let test_chunks = [ + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Hole, + }, + data: b"", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 1, end: 2 }, + data: ChunkData::Data, + }, + data: b"\x00\x01\x02\x03\x04\x05\x06\x07", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 2, end: 3 }, + data: ChunkData::Hole, + }, + data: b"", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 3, end: 4 }, + data: ChunkData::Data, + }, + data: b"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 4, end: 5 }, + data: ChunkData::Hole, + }, + data: b"", + }, + ]; + let sha512 = [ + 0xee, 0x07, 0xc5, 0x4d, 0x85, 0xee, 0x69, 0x91, 0x61, 0x07, 0x10, 0xed, 0xec, 0x13, 0x5e, + 0xfb, 0xc3, 0x7d, 0xcf, 0x1f, 0x2a, 0x13, 0xf0, 0xb6, 0x85, 0xb4, 0xee, 0xe9, 0xd7, 0xa1, + 0x12, 0x79, 0x14, 0x16, 0x30, 0x7a, 0x81, 0xf9, 0x4f, 0x72, 0xb2, 0xdd, 0x33, 0xbe, 0x5d, + 0x55, 0x70, 0xa9, 0xe3, 0x94, 0x29, 0x40, 0x29, 0x8f, 0x35, 0x23, 0xf8, 0x78, 0x7f, 0xfe, + 0xd6, 0x4b, 0x60, 0x16, + ]; + + round_trip(block_size, file_crc32, &test_chunks, &sha512); +} diff --git a/deny.toml b/deny.toml index 4737d76..2c720f8 100644 --- a/deny.toml +++ b/deny.toml @@ -33,6 +33,7 @@ allow = [ "Apache-2.0", "Apache-2.0 WITH LLVM-exception", "BSD-3-Clause", + "CC0-1.0", "GPL-3.0", "ISC", "MIT", diff --git a/fuzz/src/bin/sparse.rs b/fuzz/src/bin/sparse.rs new file mode 100644 index 0000000..0d77fd4 --- /dev/null +++ b/fuzz/src/bin/sparse.rs @@ -0,0 +1,27 @@ +#[cfg(not(windows))] +mod fuzz { + use std::io::{self, Cursor}; + + use avbroot::format::sparse::{ChunkData, CrcMode, SparseReader}; + use honggfuzz::fuzz; + + pub fn main() { + loop { + fuzz!(|data: &[u8]| { + let reader = Cursor::new(data); + if let Ok(mut sparse_reader) = SparseReader::new(reader, CrcMode::Ignore) { + while let Ok(Some(chunk)) = sparse_reader.next_chunk() { + if chunk.data == ChunkData::Data { + let _ = io::copy(&mut sparse_reader, &mut io::sink()); + } + } + } + }); + } + } +} + +fn main() { + #[cfg(not(windows))] + fuzz::main(); +}