diff --git a/Cargo.lock b/Cargo.lock index 3072181..61e0be2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -131,7 +131,9 @@ dependencies = [ "cms", "const-oid", "constcat", + "crc32fast", "ctrlc", + "dlv-list", "flate2", "gf256", "hex", @@ -412,6 +414,26 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + [[package]] name = "constcat" version = "0.5.0" @@ -461,6 +483,12 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto-common" version = "0.1.6" @@ -552,6 +580,15 @@ dependencies = [ "subtle", ] +[[package]] +name = "dlv-list" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" +dependencies = [ + "const-random", +] + [[package]] name = "e2e" version = "3.6.0" @@ -1694,6 +1731,15 @@ dependencies = [ "once_cell", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tls_codec" version = "0.4.1" diff --git a/README.extra.md b/README.extra.md index d6617ac..7807c9e 100644 --- a/README.extra.md +++ b/README.extra.md @@ -286,7 +286,7 @@ All metadata slots in the newly packed LP image will be identical. ### Repacking an LP image ```bash -avbroot lp repack [-i ] [-i ]... -o [-o ]... +avbroot lp repack -i [-i ]... -o [-o ]... ``` This subcommand is logically equivalent to `avbroot lp unpack` followed by `avbroot lp pack`, except more efficient. Instead of unpacking and packing all partition images, the raw data is directly copied from the old LP image to the new LP image. @@ -340,3 +340,47 @@ avbroot payload info -i ``` This subcommand shows all of the payload header fields (which will likely be extremely long). + +## `avbroot sparse` + +This set of commands is for working with Android sparse images. All features of the file format are supported, including hole chunks and CRC32 checksums. + +### Unpacking a sparse image + +```bash +avbroot sparse unpack -o -o +``` + +This subcommand unpacks a sparse image to a raw image. If the sparse image contains CRC32 checksums, they will be validated during unpacking. If the sparse image contains holes, the output image will be created as a native sparse file. + +Certain fastboot factory images may have multiple sparse images, like `super_1.img`, `super_2.img`, etc., where they all touch a disjoint set of regions on the same partition. These can be unpacked by running this subcommand for each sparse image and specifying the `--preserve` option along with using the same output file. This preserves the existing data in the output file when unpacking each sparse image. + +### Packing a sparse image + +```bash +avbroot sparse pack -i -o +``` + +This subcommand packs a new sparse image from a raw image. The default block size is 4096 bytes, which can be changed with the `--block-size` option. + +By default, this will pack the entire input file. However, on Linux, there is an optimization where all holes in the input file, if it is a native sparse file, will be stored as hole chunks instead of `0`-filled chunks in the output sparse image. + +To pack a partial sparse image, such as those used in the special fastboot factory images mentioned above, pass in `--region `. This option can be specified multiple times to pack multiple regions. + +Unlike AOSP's `img2simg` tool, which never writes CRC32 checksums, this subcommand will write checksums if the input file has no holes and the entire file is being packed. + +### Repacking a sparse image + +```bash +avbroot sparse repack -i -o +``` + +This subcommand is logically equivalent to `avbroot sparse unpack` followed by `avbroot sparse pack`, except more efficient. This is useful for roundtrip testing of avbroot's sparse file parser. + +### Showing sparse image metadata + +```bash +avbroot sparse info -i +``` + +This subcommand shows the sparse image metadata, including the header and all chunks. diff --git a/avbroot/Cargo.toml b/avbroot/Cargo.toml index 0300007..e006034 100644 --- a/avbroot/Cargo.toml +++ b/avbroot/Cargo.toml @@ -20,7 +20,9 @@ clap = { version = "4.4.1", features = ["derive"] } clap_complete = "4.4.0" cms = { version = "0.2.2", features = ["std"] } const-oid = "0.9.5" +crc32fast = "1.4.2" ctrlc = "3.4.0" +dlv-list = "0.5.2" flate2 = "1.0.27" gf256 = { version = "0.3.0", features = ["rs"] } hex = { version = "0.4.3", features = ["serde"] } diff --git a/avbroot/src/cli/args.rs b/avbroot/src/cli/args.rs index d991194..0532fca 100644 --- a/avbroot/src/cli/args.rs +++ b/avbroot/src/cli/args.rs @@ -15,7 +15,7 @@ use clap::{Parser, Subcommand, ValueEnum}; use tracing::{debug, Level}; use tracing_subscriber::fmt::{format::Writer, time::FormatTime}; -use crate::cli::{avb, boot, completion, cpio, fec, hashtree, key, lp, ota, payload}; +use crate::cli::{avb, boot, completion, cpio, fec, hashtree, key, lp, ota, payload, sparse}; #[allow(clippy::large_enum_variant)] #[derive(Debug, Subcommand)] @@ -30,6 +30,7 @@ pub enum Command { Lp(lp::LpCli), Ota(ota::OtaCli), Payload(payload::PayloadCli), + Sparse(sparse::SparseCli), /// (Deprecated: Use `avbroot ota patch` instead.) Patch(ota::PatchCli), /// (Deprecated: Use `avbroot ota extract` instead.) @@ -134,6 +135,7 @@ pub fn main(logging_initialized: &AtomicBool, cancel_signal: &AtomicBool) -> Res Command::Lp(c) => lp::lp_main(&c, cancel_signal), Command::Ota(c) => ota::ota_main(&c, cancel_signal), Command::Payload(c) => payload::payload_main(&c, cancel_signal), + Command::Sparse(c) => sparse::sparse_main(&c, cancel_signal), // Deprecated aliases. Command::Patch(c) => ota::patch_subcommand(&c, cancel_signal), Command::Extract(c) => ota::extract_subcommand(&c, cancel_signal), diff --git a/avbroot/src/cli/mod.rs b/avbroot/src/cli/mod.rs index 0b6371b..20e7576 100644 --- a/avbroot/src/cli/mod.rs +++ b/avbroot/src/cli/mod.rs @@ -14,3 +14,4 @@ pub mod key; pub mod lp; pub mod ota; pub mod payload; +pub mod sparse; diff --git a/avbroot/src/cli/sparse.rs b/avbroot/src/cli/sparse.rs new file mode 100644 index 0000000..643954f --- /dev/null +++ b/avbroot/src/cli/sparse.rs @@ -0,0 +1,632 @@ +/* + * SPDX-FileCopyrightText: 2024 Andrew Gunnerson + * SPDX-License-Identifier: GPL-3.0-only + */ + +use std::{ + fmt, + fs::{File, OpenOptions}, + io::{Read, Seek, SeekFrom, Write}, + ops::Range, + path::{Path, PathBuf}, + sync::atomic::AtomicBool, +}; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::{Parser, Subcommand}; +use crc32fast::Hasher; +use zerocopy::{little_endian, AsBytes}; + +use crate::{ + format::{ + padding, + sparse::{ + self, Chunk, ChunkBounds, ChunkData, ChunkList, CrcMode, Header, SparseReader, + SparseWriter, + }, + }, + stream, +}; + +struct CompactView<'a, T>(&'a [T]); + +impl<'a, T: fmt::Debug> fmt::Debug for CompactView<'a, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut list = f.debug_list(); + + for item in self.0 { + // No alternate mode for no inner newlines. + list.entry(&format_args!("{item:?}")); + } + + list.finish() + } +} + +#[derive(Clone)] +struct Metadata { + header: Header, + chunks: Vec, +} + +impl fmt::Debug for Metadata { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Metadata") + .field("header", &self.header) + .field("chunks", &CompactView(&self.chunks)) + .finish() + } +} + +fn open_reader(path: &Path) -> Result { + File::open(path).with_context(|| format!("Failed to open for reading: {path:?}")) +} + +fn open_writer(path: &Path, truncate: bool) -> Result { + OpenOptions::new() + .write(true) + .create(true) + .truncate(truncate) + .open(path) + .with_context(|| format!("Failed to open for writing: {path:?}")) +} + +fn display_metadata(cli: &SparseCli, metadata: &Metadata) { + if !cli.quiet { + println!("{metadata:#?}"); + } +} + +/// Splits large data chunks to ensure that none exceed 64 MiB. This is not +/// necessary in most cases, but is kept to match the behavior of AOSP's +/// libsparse. +fn split_chunks(chunks: &[Chunk], block_size: u32) -> Vec { + const MAX_BYTES: u32 = 64 * 1024 * 1024; + + let max_blocks_per_chunk = MAX_BYTES / block_size; + let mut result = vec![]; + + for mut chunk in chunks.iter().copied() { + if chunk.data == ChunkData::Data { + while chunk.bounds.len() > max_blocks_per_chunk { + result.push(Chunk { + bounds: ChunkBounds { + start: chunk.bounds.start, + end: chunk.bounds.start + max_blocks_per_chunk, + }, + data: chunk.data, + }); + + chunk.bounds.start += max_blocks_per_chunk; + } + } + + result.push(chunk); + } + + result +} + +/// [Linux only] Find allocated regions of the file. This avoids needing to read +/// unused portions of the file if it is a native sparse file. +#[cfg(any(target_os = "linux", target_os = "android"))] +fn find_allocated_regions( + path: &Path, + reader: &mut File, + cancel_signal: &AtomicBool, +) -> Result>> { + use rustix::{fs::SeekFrom, io::Errno}; + + let mut result = vec![]; + let mut start; + let mut end = 0; + + loop { + stream::check_cancel(cancel_signal)?; + + start = match rustix::fs::seek(&*reader, SeekFrom::Data(end as i64)) { + Ok(offset) => offset, + Err(e) if e == Errno::NXIO => break, + Err(e) => return Err(e).with_context(|| format!("Failed to seek to data: {path:?}")), + }; + + end = rustix::fs::seek(&*reader, SeekFrom::Hole(start as i64)) + .with_context(|| format!("Failed to seek to hole: {path:?}"))?; + + result.push(start..end); + } + + Ok(result) +} + +/// Compute chunk boundaries for the list of potentially overlapping file byte +/// regions. If `exact_bounds` is true, then the regions must be block-aligned. +/// Otherwise, the lower boundaries are aligned down and the upper boundaries +/// are aligned up. +fn get_chunks_for_regions( + block_size: u32, + file_size: u64, + file_regions: &[Range], + exact_bounds: bool, +) -> Result<(u32, Vec)> { + let block_size_64 = u64::from(block_size); + + let file_blocks: u32 = (file_size / u64::from(block_size)) + .try_into() + .map_err(|_| anyhow!("File size {file_size} too large for block size {block_size}"))?; + + let mut chunk_list = ChunkList::new(); + chunk_list.set_len(file_blocks); + + for region in file_regions { + let mut start_byte = region.start; + let mut end_byte = region.end; + + if exact_bounds { + if start_byte % block_size_64 != 0 || end_byte % block_size_64 != 0 { + bail!("File region bounds are not block-aligned: {region:?}"); + } + } else { + start_byte = start_byte / block_size_64 * block_size_64; + end_byte = padding::round(end_byte, block_size_64).unwrap(); + } + + let start_block: u32 = (start_byte / block_size_64).try_into().map_err(|_| { + anyhow!("Region start offset {start_byte} too large for block size {block_size}") + })?; + let end_block: u32 = (end_byte / block_size_64).try_into().map_err(|_| { + anyhow!("Region end offset {end_byte} too large for block size {block_size}") + })?; + + chunk_list.insert_data(ChunkBounds { + start: start_block, + end: end_block, + }); + } + + let chunks = chunk_list.iter_allocated().map(|c| c.bounds).collect(); + + Ok((file_blocks, chunks)) +} + +/// Compute the sparse [`Chunk`]s needed to cover the specified regions. +fn compute_chunks( + path: &Path, + reader: &mut File, + block_size: u32, + file_blocks: u32, + block_regions: &[ChunkBounds], + cancel_signal: &AtomicBool, +) -> Result<(ChunkList, u32)> { + let mut chunk_list = ChunkList::new(); + let mut hasher = Some(Hasher::new()); + let mut buf = vec![0u8; block_size as usize]; + let mut block = 0; + + chunk_list.set_len(file_blocks); + + for bounds in block_regions { + if bounds.start != block { + // Not contiguous so we cannot compute the checksum. + hasher = None; + } + + let offset = u64::from(bounds.start) * u64::from(block_size); + + reader + .seek(SeekFrom::Start(offset)) + .with_context(|| format!("Failed to seek file: {path:?}"))?; + + for block in *bounds { + stream::check_cancel(cancel_signal)?; + + reader + .read_exact(&mut buf) + .with_context(|| format!("Failed to read full block: {path:?}"))?; + + if let Some(h) = &mut hasher { + h.update(&buf); + } + + let new_bounds = ChunkBounds { + start: block, + end: block + 1, + }; + + if buf.chunks_exact(4).all(|c| c == &buf[..4]) { + let fill_value = u32::from_le_bytes(buf[..4].try_into().unwrap()); + chunk_list.insert_fill(new_bounds, fill_value); + } else { + chunk_list.insert_data(new_bounds); + } + } + + block = bounds.end; + } + + if block != file_blocks { + hasher = None; + } + + let crc32 = hasher.map(|h| h.finalize()).unwrap_or_default(); + + Ok((chunk_list, crc32)) +} + +fn unpack_subcommand( + sparse_cli: &SparseCli, + cli: &UnpackCli, + cancel_signal: &AtomicBool, +) -> Result<()> { + let reader = open_reader(&cli.input)?; + let mut sparse_reader = SparseReader::new(reader, CrcMode::Validate) + .with_context(|| format!("Failed to read sparse file: {:?}", cli.input))?; + + let mut metadata = Metadata { + header: sparse_reader.header(), + chunks: vec![], + }; + + let mut writer = open_writer(&cli.output, !cli.preserve)?; + + if cli.preserve { + let expected_size = + u64::from(metadata.header.num_blocks) * u64::from(metadata.header.block_size); + let file_size = writer + .seek(SeekFrom::End(0)) + .with_context(|| format!("Failed to get file size: {:?}", cli.output))?; + + if file_size < expected_size { + writer + .set_len(expected_size) + .with_context(|| format!("Failed to set file size: {:?}", cli.output))?; + } + + writer + .seek(SeekFrom::Start(0)) + .with_context(|| format!("Failed to seek file: {:?}", cli.output))?; + } + + while let Some(chunk) = sparse_reader + .next_chunk() + .with_context(|| format!("Failed to read chunk: {:?}", cli.input))? + { + match chunk.data { + ChunkData::Fill(value) => { + let fill_value = little_endian::U32::from(value); + let buf = vec![fill_value; metadata.header.block_size as usize / 4]; + + for _ in chunk.bounds { + stream::check_cancel(cancel_signal)?; + + writer + .write_all(buf.as_bytes()) + .with_context(|| format!("Failed to write data: {:?}", cli.output))?; + } + } + ChunkData::Data => { + // This cannot overflow. + let to_copy = chunk.bounds.len() * metadata.header.block_size; + + stream::copy_n( + &mut sparse_reader, + &mut writer, + to_copy.into(), + cancel_signal, + ) + .with_context(|| { + format!("Failed to copy data: {:?} -> {:?}", cli.input, cli.output) + })?; + } + ChunkData::Hole => { + // This cannot overflow. + let to_skip = chunk.bounds.len() * metadata.header.block_size; + + writer + .seek(SeekFrom::Current(to_skip.into())) + .with_context(|| format!("Failed to seek file: {:?}", cli.output))?; + } + ChunkData::Crc32(_) => {} + } + + metadata.chunks.push(chunk); + } + + display_metadata(sparse_cli, &metadata); + + sparse_reader + .finish() + .with_context(|| format!("Failed to finalize reader: {:?}", cli.input))?; + + Ok(()) +} + +fn pack_subcommand( + sparse_cli: &SparseCli, + cli: &PackCli, + cancel_signal: &AtomicBool, +) -> Result<()> { + if cli.block_size == 0 || cli.block_size % 4 != 0 { + bail!( + "Block size must be a non-zero multiple of 4: {}", + cli.block_size, + ); + } + + let mut reader = open_reader(&cli.input)?; + + let file_size = reader + .seek(SeekFrom::End(0)) + .with_context(|| format!("Failed to get file size: {:?}", cli.input))?; + if file_size % u64::from(cli.block_size) != 0 { + bail!( + "File size {file_size} is not a multiple of block size {}", + cli.block_size, + ); + } + + // Compute the byte regions to pack into the sparse file. + let (file_regions, exact_bounds) = if !cli.region.is_empty() { + let regions = cli + .region + .chunks_exact(2) + .map(|c| c[0]..c[1]) + .collect::>(); + + (regions, false) + } else { + #[cfg(any(target_os = "linux", target_os = "android"))] + { + let regions = find_allocated_regions(&cli.input, &mut reader, cancel_signal)?; + + (regions, false) + } + #[cfg(not(any(target_os = "linux", target_os = "android")))] + { + (vec![0..file_size], true) + } + }; + + // Get the file regions as non-overlapping and sorted block regions. + let (file_blocks, block_regions) = + get_chunks_for_regions(cli.block_size, file_size, &file_regions, exact_bounds)?; + + // Compute the checksum (if possible) and the list of actual chunks. + let (chunk_list, crc32) = compute_chunks( + &cli.input, + &mut reader, + cli.block_size, + file_blocks, + &block_regions, + cancel_signal, + )?; + + let chunks = split_chunks(&chunk_list.to_chunks(), cli.block_size); + let metadata = Metadata { + header: Header { + major_version: sparse::MAJOR_VERSION, + minor_version: sparse::MINOR_VERSION, + block_size: cli.block_size, + num_blocks: chunk_list.len(), + // This can't overflow because the number of chunks is always + // smaller than the number of blocks (because we don't add CRC32 + // chunks). + num_chunks: chunks.len() as u32, + // This will be zero if the regions don't span the entire file. + crc32, + }, + chunks, + }; + + display_metadata(sparse_cli, &metadata); + + let writer = open_writer(&cli.output, true)?; + let mut sparse_writer = SparseWriter::new(writer, metadata.header) + .with_context(|| format!("Failed to initialize sparse file: {:?}", cli.output))?; + + for chunk in metadata.chunks { + sparse_writer + .start_chunk(chunk) + .with_context(|| format!("Failed to start chunk: {:?}", cli.output))?; + + if chunk.data == ChunkData::Data { + let offset = u64::from(chunk.bounds.start) * u64::from(cli.block_size); + + reader + .seek(SeekFrom::Start(offset)) + .with_context(|| format!("Failed to seek file: {:?}", cli.input))?; + + let to_copy = u64::from(chunk.bounds.len()) * u64::from(cli.block_size); + + stream::copy_n(&mut reader, &mut sparse_writer, to_copy, cancel_signal).with_context( + || format!("Failed to copy data: {:?} -> {:?}", cli.input, cli.output), + )?; + } + } + + sparse_writer + .finish() + .with_context(|| format!("Failed to finalize writer: {:?}", cli.output))?; + + Ok(()) +} + +fn repack_subcommand( + sparse_cli: &SparseCli, + cli: &RepackCli, + cancel_signal: &AtomicBool, +) -> Result<()> { + let reader = open_reader(&cli.input)?; + let mut sparse_reader = SparseReader::new_seekable(reader, CrcMode::Validate) + .with_context(|| format!("Failed to read sparse file: {:?}", cli.input))?; + + let mut metadata = Metadata { + header: sparse_reader.header(), + chunks: vec![], + }; + + let writer = open_writer(&cli.output, true)?; + let mut sparse_writer = SparseWriter::new(writer, metadata.header) + .with_context(|| format!("Failed to initialize sparse file: {:?}", cli.output))?; + + while let Some(chunk) = sparse_reader + .next_chunk() + .with_context(|| format!("Failed to read chunk: {:?}", cli.input))? + { + sparse_writer + .start_chunk(chunk) + .with_context(|| format!("Failed to start chunk: {:?}", cli.output))?; + + if chunk.data == ChunkData::Data { + // This cannot overflow. + let to_copy = chunk.bounds.len() * metadata.header.block_size; + + stream::copy_n( + &mut sparse_reader, + &mut sparse_writer, + to_copy.into(), + cancel_signal, + ) + .with_context(|| format!("Failed to copy data: {:?} -> {:?}", cli.input, cli.output))?; + } + + metadata.chunks.push(chunk); + } + + display_metadata(sparse_cli, &metadata); + + sparse_reader + .finish() + .with_context(|| format!("Failed to finalize reader: {:?}", cli.input))?; + sparse_writer + .finish() + .with_context(|| format!("Failed to finalize writer: {:?}", cli.output))?; + + Ok(()) +} + +fn info_subcommand(sparse_cli: &SparseCli, cli: &InfoCli) -> Result<()> { + let reader = open_reader(&cli.input)?; + let mut sparse_reader = SparseReader::new_seekable(reader, CrcMode::Ignore) + .with_context(|| format!("Failed to read sparse file: {:?}", cli.input))?; + + let mut metadata = Metadata { + header: sparse_reader.header(), + chunks: vec![], + }; + + while let Some(chunk) = sparse_reader + .next_chunk() + .with_context(|| format!("Failed to read chunk: {:?}", cli.input))? + { + metadata.chunks.push(chunk); + } + + display_metadata(sparse_cli, &metadata); + + Ok(()) +} + +pub fn sparse_main(cli: &SparseCli, cancel_signal: &AtomicBool) -> Result<()> { + match &cli.command { + SparseCommand::Unpack(c) => unpack_subcommand(cli, c, cancel_signal), + SparseCommand::Pack(c) => pack_subcommand(cli, c, cancel_signal), + SparseCommand::Repack(c) => repack_subcommand(cli, c, cancel_signal), + SparseCommand::Info(c) => info_subcommand(cli, c), + } +} + +/// Unpack a sparse image. +#[derive(Debug, Parser)] +struct UnpackCli { + /// Path to input sparse image. + #[arg(short, long, value_name = "FILE", value_parser)] + input: PathBuf, + + /// Path to output raw image. + #[arg(short, long, value_name = "FILE", value_parser)] + output: PathBuf, + + /// Preserve existing data in the output file. + /// + /// This is useful when unpacking multiple sparse files into a single output + /// file because they contain disjoint blocks of data. + #[arg(long)] + preserve: bool, +} + +/// Pack a sparse image. +#[derive(Debug, Parser)] +struct PackCli { + /// Path to output sparse image. + /// + /// If `--region` is not used and the input file is not a (native) sparse + /// file on Linux, then the output sparse image is written with a CRC32 + /// checksum in the header. + #[arg(short, long, value_name = "FILE", value_parser)] + output: PathBuf, + + /// Path to input raw image. + /// + /// On Linux, if this is a (native) sparse file, then the unallocated + /// sections of the file will be skipped and will be stored in the output + /// file as hole chunks. + #[arg(short, long, value_name = "FILE", value_parser)] + input: PathBuf, + + /// Block size. + #[arg(short, long, value_name = "BYTES", default_value_t = 4096)] + block_size: u32, + + /// Pack certain byte regions from the file. + /// + /// The start offset will be aligned down to the block size and the end + /// offset will be aligned up. This option can be specified any number of + /// times and in any order. Overlapping regions are allowed. + /// + /// Unused regions will be stored in the sparse file as hole chunks. + #[arg(short, long, value_names = ["START", "END"], num_args = 2)] + region: Vec, +} + +/// Repack a sparse image. +/// +/// This command is equivalent to running `unpack` and `pack`, except without +/// storing the unpacked data to disk. +#[derive(Debug, Parser)] +struct RepackCli { + /// Path to input sparse image. + #[arg(short, long, value_name = "FILE", value_parser)] + input: PathBuf, + + /// Path to output sparse image. + #[arg(short, long, value_name = "FILE", value_parser)] + output: PathBuf, +} + +/// Display sparse image metadata. +#[derive(Debug, Parser)] +struct InfoCli { + /// Path to input sparse image. + #[arg(short, long, value_name = "FILE", value_parser)] + input: PathBuf, +} + +#[derive(Debug, Subcommand)] +enum SparseCommand { + Unpack(UnpackCli), + Pack(PackCli), + Repack(RepackCli), + Info(InfoCli), +} + +/// Pack, unpack, and inspect sparse images. +#[derive(Debug, Parser)] +pub struct SparseCli { + #[command(subcommand)] + command: SparseCommand, + + /// Don't print sparse image metadata. + #[arg(short, long, global = true)] + quiet: bool, +} diff --git a/avbroot/src/format/lp.rs b/avbroot/src/format/lp.rs index 47406d4..8cec2c6 100644 --- a/avbroot/src/format/lp.rs +++ b/avbroot/src/format/lp.rs @@ -166,7 +166,7 @@ const _: () = assert!(mem::size_of::() < GEOMETRY_SIZE as usize); impl fmt::Debug for RawGeometry { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RawGeometry") - .field("magic", &format_args!("{:#08x}", self.magic.get())) + .field("magic", &format_args!("{:#010x}", self.magic.get())) .field("struct_size", &self.struct_size.get()) .field("checksum", &hex::encode(self.checksum)) .field("metadata_max_size", &self.metadata_max_size.get()) @@ -182,7 +182,7 @@ impl RawGeometry { fn validate(&self) -> Result<()> { if self.magic.get() != GEOMETRY_MAGIC { return Err(Error::Geometry(format!( - "Invalid magic: {:#08x}", + "Invalid magic: {:#010x}", self.magic.get(), ))); } @@ -332,7 +332,7 @@ struct RawHeader { impl fmt::Debug for RawHeader { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RawHeader") - .field("magic", &format_args!("{:#08x}", self.magic.get())) + .field("magic", &format_args!("{:#010x}", self.magic.get())) .field("major_version", &self.major_version.get()) .field("minor_version", &self.minor_version.get()) .field("header_size", &self.header_size.get()) @@ -392,7 +392,7 @@ impl RawHeader { fn validate(&self, geometry: &RawGeometry) -> Result<()> { if self.magic.get() != HEADER_MAGIC { return Err(Error::Header(format!( - "Invalid magic: {:#08x}", + "Invalid magic: {:#010x}", self.magic.get(), ))); } diff --git a/avbroot/src/format/mod.rs b/avbroot/src/format/mod.rs index af6ffaa..4ceea9a 100644 --- a/avbroot/src/format/mod.rs +++ b/avbroot/src/format/mod.rs @@ -13,4 +13,5 @@ pub mod lp; pub mod ota; pub mod padding; pub mod payload; +pub mod sparse; pub mod verityrs; diff --git a/avbroot/src/format/sparse.rs b/avbroot/src/format/sparse.rs new file mode 100644 index 0000000..756cfcb --- /dev/null +++ b/avbroot/src/format/sparse.rs @@ -0,0 +1,1211 @@ +/* + * SPDX-FileCopyrightText: 2024 Andrew Gunnerson + * SPDX-License-Identifier: GPL-3.0-only + */ + +use std::{ + fmt, + io::{self, Read, Seek, SeekFrom, Write}, + mem, + ops::Range, +}; + +use crc32fast::Hasher; +use dlv_list::{Index, VecList}; +use thiserror::Error; +use zerocopy::{byteorder::little_endian, AsBytes, FromBytes, FromZeroes, Unaligned}; + +/// Magic value for [`RawHeader::magic`]. +const HEADER_MAGIC: u32 = 0xed26ff3a; + +/// Raw chunk type for [`RawChunk::chunk_type`]. +const CHUNK_TYPE_RAW: u16 = 0xcac1; +/// Fill chunk type for [`RawChunk::chunk_type`]. +const CHUNK_TYPE_FILL: u16 = 0xcac2; +/// Hole chunk type for [`RawChunk::chunk_type`]. +const CHUNK_TYPE_DONT_CARE: u16 = 0xcac3; +/// CRC32 chunk type for [`RawChunk::chunk_type`]. +const CHUNK_TYPE_CRC32: u16 = 0xcac4; + +/// Supported major version. +pub const MAJOR_VERSION: u16 = 1; +/// Supported minor version. +pub const MINOR_VERSION: u16 = 0; + +#[derive(Debug, Error)] +pub enum Error { + #[error("Sparse header: {0}")] + Header(String), + #[error("Sparse chunk #{0}: {1}")] + Chunk(u32, String), + #[error("Sparse reader: {0}")] + Reader(String), + #[error("Sparse writer: {0}")] + Writer(String), + #[error("I/O error")] + Io(#[from] io::Error), +} + +type Result = std::result::Result; + +/// Raw on-disk layout for the header. +#[derive(Clone, Copy, FromZeroes, FromBytes, AsBytes, Unaligned)] +#[repr(packed)] +struct RawHeader { + /// Magic value. This should be equal to [`HEADER_MAGIC`]. + magic: little_endian::U32, + /// Major version. [`MAJOR_VERSION`] is the only version supported. All + /// other versions cannot be parsed. + major_version: little_endian::U16, + /// Minor version. Versions aside from [`MINOR_VERSION`] can be read, but + /// not written. + minor_version: little_endian::U16, + /// Size of this [`RawHeader`]. + file_hdr_sz: little_endian::U16, + /// Size of a [`RawChunk`]. + chunk_hdr_sz: little_endian::U16, + /// Block size in bytes. Must be a multiple of 4. + blk_sz: little_endian::U32, + /// Number of blocks when unsparsed. + total_blks: little_endian::U32, + /// Number of chunks. + total_chunks: little_endian::U32, + /// CRC32 checksum of the original data. + image_checksum: little_endian::U32, +} + +impl fmt::Debug for RawHeader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RawHeader") + .field("magic", &format_args!("{:#010x}", self.magic)) + .field("major_version", &self.major_version.get()) + .field("minor_version", &self.minor_version.get()) + .field("file_hdr_sz", &self.file_hdr_sz.get()) + .field("chunk_hdr_sz", &self.chunk_hdr_sz.get()) + .field("blk_sz", &self.blk_sz.get()) + .field("total_blks", &self.total_blks.get()) + .field("total_chunks", &self.total_chunks.get()) + .field( + "image_checksum", + &format_args!("{:#010x}", self.image_checksum.get()), + ) + .finish() + } +} + +impl RawHeader { + fn validate(&self) -> Result<()> { + if self.magic.get() != HEADER_MAGIC { + return Err(Error::Header(format!( + "Invalid magic: {:#010x}", + self.magic.get(), + ))); + } + + if self.major_version.get() != MAJOR_VERSION { + return Err(Error::Header(format!( + "Unsupported major version: {}", + self.major_version.get(), + ))); + } + + if self.file_hdr_sz.get() != mem::size_of::() as u16 { + return Err(Error::Header(format!( + "Invalid file header size: {}", + self.file_hdr_sz.get(), + ))); + } else if self.chunk_hdr_sz.get() != mem::size_of::() as u16 { + return Err(Error::Header(format!( + "Invalid chunk header size: {}", + self.chunk_hdr_sz.get(), + ))); + } + + if self.blk_sz.get() == 0 || self.blk_sz.get() % 4 != 0 { + return Err(Error::Header(format!( + "Invalid block size: {}", + self.blk_sz.get(), + ))); + } + + Ok(()) + } +} + +/// Raw on-disk layout for the chunk header. +#[derive(Clone, Copy, FromZeroes, FromBytes, AsBytes, Unaligned)] +#[repr(packed)] +struct RawChunk { + /// Chunk type. Must be [`CHUNK_TYPE_RAW`], [`CHUNK_TYPE_FILL`], + /// [`CHUNK_TYPE_DONT_CARE`], or [`CHUNK_TYPE_CRC32`]. + chunk_type: little_endian::U16, + /// Unused. + reserved1: little_endian::U16, + /// Number of unsparsed blocks this chunk represents. + chunk_sz: little_endian::U32, + /// The size in bytes of this chunk, including this [`RawChunk`]. + total_sz: little_endian::U32, +} + +impl fmt::Debug for RawChunk { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RawChunk") + .field("chunk_type", &self.chunk_type.get()) + .field("reserved1", &format_args!("{:#010x}", self.reserved1.get())) + .field("chunk_sz", &self.chunk_sz.get()) + .field("total_sz", &self.total_sz.get()) + .finish() + } +} + +impl RawChunk { + fn expected_size(&self, index: u32, header: &RawHeader) -> Result { + let data_size = match self.chunk_type.get() { + CHUNK_TYPE_RAW => self + .chunk_sz + .get() + .checked_mul(header.blk_sz.get()) + .ok_or_else(|| { + Error::Chunk( + index, + format!( + "Chunk size overflow: {} * {}", + self.chunk_sz.get(), + header.blk_sz.get(), + ), + ) + })?, + CHUNK_TYPE_FILL | CHUNK_TYPE_CRC32 => 4, + CHUNK_TYPE_DONT_CARE => 0, + t => return Err(Error::Chunk(index, format!("Invalid chunk type: {t}"))), + }; + + data_size + .checked_add(mem::size_of::() as u32) + .ok_or_else(|| Error::Chunk(index, format!("Data size too large: {data_size}"))) + } + + fn validate(&self, index: u32, header: &RawHeader, start_block: u32) -> Result<()> { + let end_block = start_block + .checked_add(self.chunk_sz.get()) + .ok_or_else(|| { + Error::Chunk( + index, + format!( + "Block count overflow: {start_block} + {}", + self.chunk_sz.get(), + ), + ) + })?; + + if end_block > header.total_blks.get() { + return Err(Error::Chunk( + index, + format!( + "End block {end_block} exceeds total blocks {}", + header.total_blks.get(), + ), + ))?; + } + + if self.chunk_type.get() == CHUNK_TYPE_CRC32 && self.chunk_sz.get() != 0 { + return Err(Error::Chunk( + index, + format!( + "CRC32 chunk has non-zero blocks: {:?}", + start_block..end_block, + ), + )); + } + + let expected_size = self.expected_size(index, header)?; + + if expected_size != self.total_sz.get() { + return Err(Error::Chunk( + index, + format!( + "Expected total size {expected_size}, but have {}", + self.total_sz.get(), + ), + )); + } + + Ok(()) + } +} + +/// Sparse file header. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Header { + /// Major version. [`MAJOR_VERSION`] is the only version supported. All + /// other versions cannot be parsed. + pub major_version: u16, + /// Minor version. Versions aside from [`MINOR_VERSION`] can be read, but + /// not written. + pub minor_version: u16, + /// Block size in bytes. Must be a multiple of 4. + pub block_size: u32, + /// Number of blocks when unsparsed. + pub num_blocks: u32, + /// Number of chunks. + pub num_chunks: u32, + /// CRC32 checksum of the original data. + pub crc32: u32, +} + +impl fmt::Debug for Header { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Header") + .field("major_version", &self.major_version) + .field("minor_version", &self.minor_version) + .field("block_size", &self.block_size) + .field("num_blocks", &self.num_blocks) + .field("num_chunks", &self.num_chunks) + .field("crc32", &format_args!("{:#010x}", self.crc32)) + .finish() + } +} + +/// Half-open range indicating the block range that a chunk covers. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct ChunkBounds { + /// Starting block (inclusive). + pub start: u32, + /// Ending block (exclusive). + pub end: u32, +} + +impl fmt::Debug for ChunkBounds { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}..{}", self.start, self.end) + } +} + +impl IntoIterator for ChunkBounds { + type Item = u32; + + type IntoIter = Range; + + fn into_iter(self) -> Self::IntoIter { + self.start..self.end + } +} + +impl ChunkBounds { + /// Length in blocks. + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> u32 { + self.end - self.start + } +} + +/// The type of data contained in a chunk. +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum ChunkData { + /// The chunk is filled with raw data. + Data, + /// The chunk is filled with repeating patterns of the specified integer + /// encoded in little-endian. + Fill(u32), + /// The chunk is a hole and does not represent useful or valid data. + Hole, + /// The chunk is a CRC32 checksum. This does not represent actual data but + /// serves as a checkpoint for validating the current checksum while in the + /// middle of the sparse file. + Crc32(u32), +} + +impl fmt::Debug for ChunkData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Data => write!(f, "Data"), + Self::Fill(value) => f + .debug_tuple("Fill") + .field(&format_args!("{value:#010x}")) + .finish(), + Self::Hole => write!(f, "Hole"), + Self::Crc32(checksum) => f + .debug_tuple("Crc32") + .field(&format_args!("{checksum:#010x}")) + .finish(), + } + } +} + +/// A type that represents a contiguous list of blocks and the type of data or +/// metadata they contain. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Chunk { + pub bounds: ChunkBounds, + pub data: ChunkData, +} + +impl fmt::Debug for Chunk { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Chunk") + .field("bounds", &self.bounds) + .field("data", &format_args!("{:?}", self.data)) + .finish() + } +} + +/// A type for computing the minimal number of chunks for storing some given +/// data. Adding chunks sequentially is most efficient, though chunks can be +/// added in any order. Adding a new chunk that overlaps an existing chunk will +/// remove, truncate, or split the existing chunk accordingly. +#[derive(Clone, Debug, Default)] +pub struct ChunkList { + chunks: VecList, + last_used: Option>, + size: u32, +} + +impl ChunkList { + pub fn new() -> Self { + Self::default() + } + + /// Split the previous chunk if its bounds contain the specified chunk. + fn split_prev(&mut self, index: Index) { + let Some(prev_index) = self.chunks.get_previous_index(index) else { + return; + }; + + let cur = *self.chunks.get(index).unwrap(); + let prev = self.chunks.get_mut(prev_index).unwrap(); + + debug_assert!(prev.bounds.start <= cur.bounds.start); + + if prev.bounds.end > cur.bounds.end { + let new_chunk = Chunk { + bounds: ChunkBounds { + start: cur.bounds.end, + end: prev.bounds.end, + }, + data: prev.data, + }; + + prev.bounds.end = cur.bounds.start; + self.chunks.insert_after(index, new_chunk); + } + } + + /// Merge the chunk at the specified index upwards until there are no more + /// mergeable chunks. Returns the index of the new chunk that contains the + /// original chunk. + fn merge_down(&mut self, mut index: Index) -> Index { + while let Some(prev_index) = self.chunks.get_previous_index(index) { + let cur = *self.chunks.get(index).unwrap(); + let prev = self.chunks.get_mut(prev_index).unwrap(); + + debug_assert!(prev.bounds.start <= cur.bounds.start); + + if prev.bounds.end < cur.bounds.start { + // There's a gap. + break; + } else if cur.bounds.start <= prev.bounds.start { + // Current chunk completely overlaps the previous chunk, so + // remove the previous chunk. + self.chunks.remove(prev_index); + continue; + } else if cur.bounds.start < prev.bounds.end { + // Current chunk partially overlaps the previous chunk, so + // truncate the previous chunk. + prev.bounds.end = cur.bounds.start; + } + + // If the data is compatible, then merge the chunks. + if cur.data == prev.data { + prev.bounds.end = cur.bounds.end; + self.chunks.remove(index); + index = prev_index; + } + + break; + } + + index + } + + /// Merge the chunk at the specified index downwards until there are no more + /// mergeable chunks. Returns the index of the new chunk that contains the + /// original chunk. + fn merge_up(&mut self, mut index: Index) -> Index { + while let Some(next_index) = self.chunks.get_next_index(index) { + let cur = *self.chunks.get(index).unwrap(); + let next = self.chunks.get_mut(next_index).unwrap(); + + debug_assert!(cur.bounds.start <= next.bounds.start); + + if cur.bounds.end < next.bounds.start { + // There's a gap. + break; + } else if cur.bounds.end >= next.bounds.end { + // Current chunk completely overlaps the next chunk, so remove + // the next chunk. + self.chunks.remove(next_index); + continue; + } else if cur.bounds.end > next.bounds.start { + // Current chunk partially overlaps the next chunk, so truncate + // the next chunk. + next.bounds.start = cur.bounds.end; + } + + // If the data is compatible, then merge the chunks. + if cur.data == next.data { + next.bounds.start = cur.bounds.start; + self.chunks.remove(index); + index = next_index; + } + + break; + } + + index + } + + /// Add the specified chunk into the list, removing, truncating, splitting, + /// or merging chunks as needed. Returns the index of the chunk that + /// contains the original chunk. + fn add_chunk(&mut self, chunk: Chunk) -> Index { + // Trivial case: adding the first chunk. + if self.chunks.is_empty() { + let index = self.chunks.push_back(chunk); + self.last_used = Some(index); + self.size = self.size.max(chunk.bounds.end); + return index; + } + + // Find the chunk to insert before. We save the last used index to + // optimize for sequential insertion and avoid needing to search the + // entire list every time. + let mut insert_before = self.chunks.front_index(); + + if let Some(last_used) = self.last_used { + if chunk.bounds.start >= self.chunks.get(last_used).unwrap().bounds.start { + // The new chunk starts after the last used chunk. + insert_before = Some(last_used); + } + } + + while let Some(index) = insert_before { + if self.chunks.get(index).unwrap().bounds.start >= chunk.bounds.start { + break; + } + + insert_before = self.chunks.get_next_index(index); + } + + let mut chunk_index = if let Some(index) = insert_before { + self.chunks.insert_before(index, chunk) + } else { + self.chunks.push_back(chunk) + }; + + // Split the previous chunk if it fully contains the new chunk. + self.split_prev(chunk_index); + + // Merge with adjancent chunks if compatible. + chunk_index = self.merge_up(chunk_index); + chunk_index = self.merge_down(chunk_index); + + self.last_used = Some(chunk_index); + self.size = self.size.max(chunk.bounds.end); + + chunk_index + } + + /// Insert actual data at the specified region. + pub fn insert_data(&mut self, bounds: ChunkBounds) { + self.add_chunk(Chunk { + bounds, + data: ChunkData::Data, + }); + } + + /// Insert a fill chunk at the specified region. The fill value is encoded + /// in little-endian. + pub fn insert_fill(&mut self, bounds: ChunkBounds, fill_value: u32) { + self.add_chunk(Chunk { + bounds, + data: ChunkData::Fill(fill_value), + }); + } + + /// Punch a hole at the specified region. If a hole is punched at the end + /// of the file, the file size does not decrease. + pub fn insert_hole(&mut self, bounds: ChunkBounds) { + let index = self.add_chunk(Chunk { + bounds, + data: ChunkData::Hole, + }); + + // Special case: we don't actually store holes. + self.last_used = self.chunks.get_previous_index(index); + self.chunks.remove(index); + } + + /// Get the file size in blocks. + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> u32 { + self.size + } + + /// Set the file size in blocks. This automatically increases when adding a + /// new chunk beyond this bound. + pub fn set_len(&mut self, size: u32) { + if size < self.size { + self.insert_hole(ChunkBounds { + start: size, + end: self.size, + }); + } + + self.size = size; + } + + /// Get the list of chunks, including all holes. + pub fn to_chunks(&self) -> Vec { + let mut result = Vec::with_capacity(self.chunks.len()); + let mut block = 0; + + for chunk in &self.chunks { + if chunk.bounds.start != block { + result.push(Chunk { + bounds: ChunkBounds { + start: block, + end: chunk.bounds.start, + }, + data: ChunkData::Hole, + }); + } + + result.push(*chunk); + + block = chunk.bounds.end; + } + + if block != self.size { + result.push(Chunk { + bounds: ChunkBounds { + start: block, + end: self.size, + }, + data: ChunkData::Hole, + }); + } + + result + } + + /// Iterate through allocated chunks, which excludes holes. + pub fn iter_allocated(&self) -> impl Iterator + '_ { + self.chunks.iter().copied() + } +} + +/// Whether to validate CRC32 checksums. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum CrcMode { + Validate, + Ignore, +} + +/// Hash what a fill chunk's contents would be if it were unsparsed. +fn hash_fill_chunk( + raw_chunk: &RawChunk, + fill_value: little_endian::U32, + raw_header: &RawHeader, + hasher: &mut Hasher, +) { + let buf = [fill_value; 1024]; + let mut remain = u64::from(raw_chunk.chunk_sz) * u64::from(raw_header.blk_sz); + + while remain > 0 { + let n = remain.min(buf.as_bytes().len() as u64) as usize; + hasher.update(&buf.as_bytes()[..n]); + remain -= n as u64; + } +} + +/// A type for reading sparse files. +pub struct SparseReader { + inner: R, + seek: Option io::Result>, + header: RawHeader, + /// Starting block for next chunk. + block: u32, + /// Next chunk to read. + chunk: u32, + /// Number of bytes left to read for the current chunk if the chunk has + /// [`ChunkData::Data`]. + data_remain: u32, + hasher: Option, +} + +impl SparseReader { + /// Create a new reader from a seekable file. This allows data chunks to be + /// efficiently skipped without reading them. + pub fn new_seekable(inner: R, crc_mode: CrcMode) -> Result { + let mut result = Self::new(inner, crc_mode)?; + result.seek = Some(Seek::seek); + Ok(result) + } +} + +impl SparseReader { + /// Create a new reader from a stream. This cannot efficiently skip reading + /// data chunks if they are not needed. If the underlying file is seekable + /// and skipping chunks is needed, use [`Self::new_seekable`] instead. + pub fn new(mut inner: R, crc_mode: CrcMode) -> Result { + let mut header = RawHeader::new_zeroed(); + inner.read_exact(header.as_bytes_mut())?; + + header.validate()?; + + Ok(Self { + inner, + seek: None, + header, + block: 0, + chunk: 0, + data_remain: 0, + hasher: match crc_mode { + CrcMode::Validate => Some(Hasher::new()), + CrcMode::Ignore => None, + }, + }) + } + + /// Get the sparse file header. + pub fn header(&self) -> Header { + Header { + major_version: self.header.major_version.get(), + minor_version: self.header.minor_version.get(), + block_size: self.header.blk_sz.get(), + num_blocks: self.header.total_blks.get(), + num_chunks: self.header.total_chunks.get(), + crc32: self.header.image_checksum.get(), + } + } + + /// Read the header for the next chunk. If the previous chunk had + /// [`ChunkData::Data`], the data must be fully read first unless the + /// reader is seekable and CRC validation is disabled. If the last chunk has + /// already been read, then [`None`] is returned. + /// + /// For chunks with [`ChunkData::Crc32`], if CRC validation is enabled, the + /// checksum will have already been verified. The caller does not need to + /// perform its own verification. + pub fn next_chunk(&mut self) -> Result> { + if self.data_remain != 0 { + if let Some(seek) = self.seek { + if self.hasher.is_some() { + return Err(Error::Reader( + "Cannot skip data when CRC validation is enabled".into(), + )); + } + + seek(&mut self.inner, SeekFrom::Current(self.data_remain.into()))?; + self.data_remain = 0; + } else { + return Err(Error::Reader(format!( + "Previous chunk still has {} bytes remaining", + self.data_remain, + ))); + } + } + + if self.chunk == self.header.total_chunks.get() { + return Ok(None); + } + + let mut raw_chunk = RawChunk::new_zeroed(); + self.inner.read_exact(raw_chunk.as_bytes_mut())?; + + raw_chunk.validate(self.chunk, &self.header, self.block)?; + + let data: ChunkData; + + match raw_chunk.chunk_type.get() { + CHUNK_TYPE_RAW => { + self.data_remain = + raw_chunk.total_sz.get() - u32::from(self.header.chunk_hdr_sz.get()); + + data = ChunkData::Data; + } + CHUNK_TYPE_FILL => { + let mut fill_value = little_endian::U32::new_zeroed(); + self.inner.read_exact(fill_value.as_bytes_mut())?; + + if let Some(hasher) = &mut self.hasher { + hash_fill_chunk(&raw_chunk, fill_value, &self.header, hasher); + } + + data = ChunkData::Fill(fill_value.get()); + } + CHUNK_TYPE_DONT_CARE => { + if let Some(hasher) = &mut self.hasher { + hash_fill_chunk(&raw_chunk, 0.into(), &self.header, hasher); + } + + data = ChunkData::Hole; + } + CHUNK_TYPE_CRC32 => { + let mut expected = little_endian::U32::new_zeroed(); + self.inner.read_exact(expected.as_bytes_mut())?; + + if let Some(hasher) = &mut self.hasher { + let actual = hasher.clone().finalize(); + + if actual != expected.get() { + return Err(Error::Reader(format!( + "Expected checkpoint CRC32 {expected:08x}, but have {actual:08x}", + ))); + } + } + + data = ChunkData::Crc32(expected.get()); + } + _ => unreachable!(), + }; + + let chunk = Chunk { + bounds: ChunkBounds { + start: self.block, + end: self.block + raw_chunk.chunk_sz.get(), + }, + data, + }; + + self.chunk += 1; + self.block = chunk.bounds.end; + + Ok(Some(chunk)) + } + + /// Verify the final checksum and return the underlying reader. + pub fn finish(self) -> Result { + if let Some(hasher) = self.hasher { + let expected = self.header.image_checksum.get(); + if expected != 0 { + let actual = hasher.finalize(); + + if actual != expected { + return Err(Error::Reader(format!( + "Expected final CRC32 {expected:08x}, but have {actual:08x}", + ))); + } + } + } + + Ok(self.inner) + } +} + +impl Read for SparseReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let to_read = buf.len().min(self.data_remain as usize); + + let n = self.inner.read(&mut buf[..to_read])?; + + if let Some(hasher) = &mut self.hasher { + hasher.update(&buf[..n]); + } + + self.data_remain -= n as u32; + + Ok(n) + } +} + +/// A type for writing sparse files. +pub struct SparseWriter { + inner: W, + header: RawHeader, + /// Starting block for next chunk. + block: u32, + /// Next chunk to write. + chunk: u32, + /// Number of bytes left to write for the current chunk if the chunk has + /// [`ChunkData::Data`]. + data_remain: u32, + hasher: Hasher, +} + +impl SparseWriter { + /// Create a new writer from a stream. This does not require the underlying + /// file to be seekable, so the [`Header`] must be fully known up front. + pub fn new(mut inner: W, header: Header) -> Result { + if header.minor_version != MINOR_VERSION { + return Err(Error::Writer(format!( + "Minor version not supported for writing: {}", + header.minor_version, + ))); + } + + let header = RawHeader { + magic: HEADER_MAGIC.into(), + major_version: header.major_version.into(), + minor_version: header.minor_version.into(), + file_hdr_sz: (mem::size_of::() as u16).into(), + chunk_hdr_sz: (mem::size_of::() as u16).into(), + blk_sz: header.block_size.into(), + total_blks: header.num_blocks.into(), + total_chunks: header.num_chunks.into(), + image_checksum: header.crc32.into(), + }; + + header.validate()?; + + inner.write_all(header.as_bytes())?; + + Ok(Self { + inner, + header, + block: 0, + chunk: 0, + data_remain: 0, + // We include this unconditionally because we don't know if we'll + // get any CRC32 chunks later. + hasher: Hasher::new(), + }) + } + + /// Write the header for the next chunk. If the previous chunk had + /// [`ChunkData::Data`], the data must be fully written first. + pub fn start_chunk(&mut self, chunk: Chunk) -> Result<()> { + if self.data_remain != 0 { + return Err(Error::Writer(format!( + "Previous chunk still has {} bytes remaining", + self.data_remain, + ))); + } + + if self.chunk == self.header.total_chunks.get() { + return Err(Error::Writer("Already wrote all chunk headers".into())); + } + + if chunk.bounds.start != self.block { + return Err(Error::Writer(format!( + "Gap between end of last chunk {} and start of new chunk {}", + self.block, chunk.bounds.start, + ))); + } + + let mut raw_chunk = RawChunk { + chunk_type: match chunk.data { + ChunkData::Data => CHUNK_TYPE_RAW.into(), + ChunkData::Fill(_) => CHUNK_TYPE_FILL.into(), + ChunkData::Hole => CHUNK_TYPE_DONT_CARE.into(), + ChunkData::Crc32(_) => CHUNK_TYPE_CRC32.into(), + }, + reserved1: 0.into(), + chunk_sz: chunk.bounds.len().into(), + total_sz: 0.into(), + }; + + raw_chunk.total_sz = raw_chunk.expected_size(self.chunk, &self.header)?.into(); + + raw_chunk.validate(self.chunk, &self.header, self.block)?; + + self.chunk += 1; + self.block = chunk.bounds.end; + + self.inner.write_all(raw_chunk.as_bytes())?; + + match chunk.data { + ChunkData::Data => { + self.data_remain = + raw_chunk.total_sz.get() - u32::from(self.header.chunk_hdr_sz.get()); + } + ChunkData::Fill(fill_value) => { + self.inner.write_all(&fill_value.to_le_bytes())?; + + hash_fill_chunk( + &raw_chunk, + fill_value.into(), + &self.header, + &mut self.hasher, + ); + } + ChunkData::Hole => { + hash_fill_chunk(&raw_chunk, 0.into(), &self.header, &mut self.hasher); + } + ChunkData::Crc32(expected) => { + self.inner.write_all(&expected.to_le_bytes())?; + + let actual = self.hasher.clone().finalize(); + if actual != expected { + return Err(Error::Reader(format!( + "Expected checkpoint CRC32 {expected:08x}, but have {actual:08x}", + ))); + } + } + } + + Ok(()) + } + + /// Verify the final checksum and return the underlying writer. + pub fn finish(self) -> Result { + let expected = self.header.image_checksum.get(); + if expected != 0 { + let actual = self.hasher.finalize(); + + if actual != expected { + return Err(Error::Reader(format!( + "Expected final CRC32 {expected:08x}, but have {actual:08x}", + ))); + } + } + + Ok(self.inner) + } +} + +impl Write for SparseWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + let to_write = buf.len().min(self.data_remain as usize); + + let n = self.inner.write(&buf[..to_write])?; + + self.hasher.update(&buf[..n]); + + self.data_remain -= n as u32; + + Ok(n) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} + +#[cfg(test)] +mod tests { + use super::{Chunk, ChunkBounds, ChunkData, ChunkList}; + + #[test] + fn chunk_list_merge() { + let mut list = ChunkList::new(); + + // Insert adjacent blocks in non-sequential order. + list.insert_fill(ChunkBounds { start: 1, end: 2 }, 0xaaaaaaaa); + list.insert_fill(ChunkBounds { start: 0, end: 1 }, 0xaaaaaaaa); + list.insert_fill(ChunkBounds { start: 2, end: 3 }, 0xaaaaaaaa); + assert_eq!( + list.to_chunks(), + vec![Chunk { + bounds: ChunkBounds { start: 0, end: 3 }, + data: ChunkData::Fill(0xaaaaaaaa), + },] + ); + } + + #[test] + fn chunk_list_overlap() { + let mut list = ChunkList::new(); + + // Replace existing chunks with a new chunk that ends at the same block, + // but starts earlier. + list.insert_fill(ChunkBounds { start: 2, end: 3 }, 0xaaaaaaaa); + list.insert_fill(ChunkBounds { start: 3, end: 4 }, 0xaaaaaaaa); + list.insert_fill(ChunkBounds { start: 1, end: 4 }, 0xbbbbbbbb); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Hole, + }, + Chunk { + bounds: ChunkBounds { start: 1, end: 4 }, + data: ChunkData::Fill(0xbbbbbbbb), + }, + ] + ); + + // Replace existing chunks with a new chunk that starts at the same + // block, but ends later. + list.insert_fill(ChunkBounds { start: 1, end: 5 }, 0xcccccccc); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Hole, + }, + Chunk { + bounds: ChunkBounds { start: 1, end: 5 }, + data: ChunkData::Fill(0xcccccccc), + }, + ] + ); + + // Replace existing chunks with a new chunk that's larger in both + // directions. + list.insert_fill(ChunkBounds { start: 0, end: 6 }, 0xdddddddd); + assert_eq!( + list.to_chunks(), + vec![Chunk { + bounds: ChunkBounds { start: 0, end: 6 }, + data: ChunkData::Fill(0xdddddddd), + },] + ); + + // Replace existing chunks with a new chunk that falls on the same + // boundaries exactly. + list.insert_fill(ChunkBounds { start: 0, end: 6 }, 0xeeeeeeee); + assert_eq!( + list.to_chunks(), + vec![Chunk { + bounds: ChunkBounds { start: 0, end: 6 }, + data: ChunkData::Fill(0xeeeeeeee), + },] + ); + } + + #[test] + fn chunk_list_split_chunk() { + let mut list = ChunkList::new(); + + // Insert a different chunk type into the middle of an existing chunk. + list.insert_fill(ChunkBounds { start: 0, end: 3 }, 0xaaaaaaaa); + list.insert_fill(ChunkBounds { start: 1, end: 2 }, 0xbbbbbbbb); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Fill(0xaaaaaaaa), + }, + Chunk { + bounds: ChunkBounds { start: 1, end: 2 }, + data: ChunkData::Fill(0xbbbbbbbb), + }, + Chunk { + bounds: ChunkBounds { start: 2, end: 3 }, + data: ChunkData::Fill(0xaaaaaaaa), + }, + ] + ); + + // Insert a chunk of the same type into the middle. + list.insert_fill(ChunkBounds { start: 0, end: 3 }, 0xcccccccc); + list.insert_fill(ChunkBounds { start: 1, end: 2 }, 0xcccccccc); + assert_eq!( + list.to_chunks(), + vec![Chunk { + bounds: ChunkBounds { start: 0, end: 3 }, + data: ChunkData::Fill(0xcccccccc), + },] + ); + } + + #[test] + fn chunk_list_punch_hole() { + let mut list = ChunkList::new(); + + // Punch a hole in the middle. + list.insert_fill(ChunkBounds { start: 0, end: 3 }, 0xaaaaaaaa); + list.insert_hole(ChunkBounds { start: 1, end: 2 }); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Fill(0xaaaaaaaa), + }, + Chunk { + bounds: ChunkBounds { start: 1, end: 2 }, + data: ChunkData::Hole, + }, + Chunk { + bounds: ChunkBounds { start: 2, end: 3 }, + data: ChunkData::Fill(0xaaaaaaaa), + }, + ] + ); + + // Punch a hole at the end. The file size should not decrease. + list.insert_hole(ChunkBounds { start: 2, end: 3 }); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Fill(0xaaaaaaaa), + }, + Chunk { + bounds: ChunkBounds { start: 1, end: 3 }, + data: ChunkData::Hole, + }, + ] + ); + + // Make the entire file a hole. + list.insert_hole(ChunkBounds { start: 0, end: 1 }); + assert_eq!( + list.to_chunks(), + vec![Chunk { + bounds: ChunkBounds { start: 0, end: 3 }, + data: ChunkData::Hole, + },] + ); + } + + #[test] + fn chunk_list_set_len() { + let mut list = ChunkList::new(); + + // Truncate the file. + list.insert_fill(ChunkBounds { start: 0, end: 3 }, 0xaaaaaaaa); + list.set_len(2); + assert_eq!( + list.to_chunks(), + vec![Chunk { + bounds: ChunkBounds { start: 0, end: 2 }, + data: ChunkData::Fill(0xaaaaaaaa), + },] + ); + + // Expand the file. + list.set_len(3); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 2 }, + data: ChunkData::Fill(0xaaaaaaaa), + }, + Chunk { + bounds: ChunkBounds { start: 2, end: 3 }, + data: ChunkData::Hole, + }, + ] + ); + + // Clear the file. + list.set_len(0); + assert_eq!(list.to_chunks(), vec![]); + + // File size should remain the same when adding a chunk that does not + // force an expansion. + list.set_len(3); + list.insert_fill(ChunkBounds { start: 0, end: 1 }, 0xbbbbbbbb); + assert_eq!( + list.to_chunks(), + vec![ + Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Fill(0xbbbbbbbb), + }, + Chunk { + bounds: ChunkBounds { start: 1, end: 3 }, + data: ChunkData::Hole, + }, + ] + ); + } +} diff --git a/avbroot/tests/sparse.rs b/avbroot/tests/sparse.rs new file mode 100644 index 0000000..b2d3b85 --- /dev/null +++ b/avbroot/tests/sparse.rs @@ -0,0 +1,173 @@ +/* + * SPDX-FileCopyrightText: 2024 Andrew Gunnerson + * SPDX-License-Identifier: GPL-3.0-only + */ + +use std::io::{Cursor, Read, Write}; + +use avbroot::format::sparse::{ + self, Chunk, ChunkBounds, ChunkData, CrcMode, Header, SparseReader, SparseWriter, +}; + +#[derive(Clone, Copy)] +struct TestChunk { + chunk: Chunk, + data: &'static [u8], +} + +fn round_trip(block_size: u32, crc32: u32, test_chunks: &[TestChunk], sha512: &[u8; 64]) { + let num_blocks = test_chunks.iter().map(|d| d.chunk.bounds.len()).sum(); + let header = Header { + major_version: sparse::MAJOR_VERSION, + minor_version: sparse::MINOR_VERSION, + block_size, + num_blocks, + num_chunks: test_chunks.len() as u32, + crc32, + }; + + let writer = Cursor::new(Vec::new()); + let mut sparse_writer = SparseWriter::new(writer, header).unwrap(); + + for test_chunk in test_chunks { + sparse_writer.start_chunk(test_chunk.chunk).unwrap(); + + if !test_chunk.data.is_empty() { + sparse_writer.write_all(test_chunk.data).unwrap(); + } + } + + let writer = sparse_writer.finish().unwrap(); + let data = writer.into_inner(); + + assert_eq!( + ring::digest::digest(&ring::digest::SHA512, &data).as_ref(), + sha512, + ); + + let reader = Cursor::new(&data); + let mut sparse_reader = SparseReader::new(reader, CrcMode::Validate).unwrap(); + + assert_eq!(sparse_reader.header(), header); + + let mut test_chunks_iter = test_chunks.iter(); + + while let Some(chunk) = sparse_reader.next_chunk().unwrap() { + let test_chunk = test_chunks_iter.next().unwrap(); + + assert_eq!(chunk, test_chunk.chunk); + + if !test_chunk.data.is_empty() { + let mut buf = vec![]; + sparse_reader.read_to_end(&mut buf).unwrap(); + + assert_eq!(buf, test_chunk.data); + } + } + + assert!(test_chunks_iter.next().is_none()); +} + +#[test] +fn round_trip_full_image() { + let block_size = 8; + let file_crc32 = 0xf6e23567; + let test_chunks = [ + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Data, + }, + data: b"\x00\x01\x02\x03\x04\x05\x06\x07", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 1, end: 1 }, + data: ChunkData::Crc32(0x88aa689f), + }, + data: b"", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 1, end: 2 }, + data: ChunkData::Fill(0x01234567), + }, + data: b"", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 2, end: 3 }, + data: ChunkData::Data, + }, + data: b"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 3, end: 3 }, + data: ChunkData::Crc32(0xf6e23567), + }, + data: b"", + }, + ]; + let sha512 = [ + 0x19, 0x5f, 0xa7, 0xdb, 0x18, 0xc6, 0xb9, 0x0e, 0xce, 0x4b, 0x4f, 0x35, 0x36, 0x79, 0x46, + 0x02, 0x7a, 0x45, 0x66, 0x63, 0x0e, 0xd9, 0x76, 0x93, 0x2b, 0x88, 0xe2, 0xbc, 0x0b, 0xd9, + 0x1f, 0x21, 0x51, 0x92, 0x00, 0x2e, 0xe3, 0xa2, 0xff, 0x24, 0xea, 0xef, 0x24, 0xd5, 0x24, + 0xf0, 0x46, 0xf3, 0x10, 0x32, 0xf4, 0xa6, 0x3b, 0x9d, 0xcd, 0xc5, 0x57, 0xf4, 0xc0, 0xe8, + 0x01, 0xe8, 0x1d, 0xb3, + ]; + + round_trip(block_size, file_crc32, &test_chunks, &sha512); +} + +#[test] +fn round_trip_partial_image() { + let block_size = 8; + let file_crc32 = 0; + let test_chunks = [ + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 0, end: 1 }, + data: ChunkData::Hole, + }, + data: b"", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 1, end: 2 }, + data: ChunkData::Data, + }, + data: b"\x00\x01\x02\x03\x04\x05\x06\x07", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 2, end: 3 }, + data: ChunkData::Hole, + }, + data: b"", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 3, end: 4 }, + data: ChunkData::Data, + }, + data: b"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + }, + TestChunk { + chunk: Chunk { + bounds: ChunkBounds { start: 4, end: 5 }, + data: ChunkData::Hole, + }, + data: b"", + }, + ]; + let sha512 = [ + 0xee, 0x07, 0xc5, 0x4d, 0x85, 0xee, 0x69, 0x91, 0x61, 0x07, 0x10, 0xed, 0xec, 0x13, 0x5e, + 0xfb, 0xc3, 0x7d, 0xcf, 0x1f, 0x2a, 0x13, 0xf0, 0xb6, 0x85, 0xb4, 0xee, 0xe9, 0xd7, 0xa1, + 0x12, 0x79, 0x14, 0x16, 0x30, 0x7a, 0x81, 0xf9, 0x4f, 0x72, 0xb2, 0xdd, 0x33, 0xbe, 0x5d, + 0x55, 0x70, 0xa9, 0xe3, 0x94, 0x29, 0x40, 0x29, 0x8f, 0x35, 0x23, 0xf8, 0x78, 0x7f, 0xfe, + 0xd6, 0x4b, 0x60, 0x16, + ]; + + round_trip(block_size, file_crc32, &test_chunks, &sha512); +} diff --git a/deny.toml b/deny.toml index 4737d76..2c720f8 100644 --- a/deny.toml +++ b/deny.toml @@ -33,6 +33,7 @@ allow = [ "Apache-2.0", "Apache-2.0 WITH LLVM-exception", "BSD-3-Clause", + "CC0-1.0", "GPL-3.0", "ISC", "MIT", diff --git a/fuzz/src/bin/sparse.rs b/fuzz/src/bin/sparse.rs new file mode 100644 index 0000000..0d77fd4 --- /dev/null +++ b/fuzz/src/bin/sparse.rs @@ -0,0 +1,27 @@ +#[cfg(not(windows))] +mod fuzz { + use std::io::{self, Cursor}; + + use avbroot::format::sparse::{ChunkData, CrcMode, SparseReader}; + use honggfuzz::fuzz; + + pub fn main() { + loop { + fuzz!(|data: &[u8]| { + let reader = Cursor::new(data); + if let Ok(mut sparse_reader) = SparseReader::new(reader, CrcMode::Ignore) { + while let Ok(Some(chunk)) = sparse_reader.next_chunk() { + if chunk.data == ChunkData::Data { + let _ = io::copy(&mut sparse_reader, &mut io::sink()); + } + } + } + }); + } + } +} + +fn main() { + #[cfg(not(windows))] + fuzz::main(); +}