Skip to content

Commit

Permalink
Let targz type conversions support bgzip format
Browse files Browse the repository at this point in the history
code reference https://github.com/madler/zlib/blob/master/examples/zran.c

at present, zran and normal targz do not consider the support for
bgzip when decompressing, so there will be problems when encountering
this kind of image, and this PR is used to support the bgzip format.

bgzip: http://www.htslib.org/doc/bgzip.html

Signed-off-by: zyfjeff <[email protected]>
  • Loading branch information
zyfjeff authored and zyfjeff committed Nov 25, 2023
1 parent af5d88b commit 68e6238
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 18 deletions.
3 changes: 2 additions & 1 deletion misc/top_images/image_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,5 @@ kong
solr
sentry
zookeeper
ghcr.io/dragonflyoss/image-service/pax-uid-test
ghcr.io/dragonflyoss/image-service/pax-uid-test
cgr.dev/chainguard/busybox
Binary file added tests/texture/zran/bgzip.tar.gz
Binary file not shown.
8 changes: 4 additions & 4 deletions utils/src/compress/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ pub fn decompress(src: &[u8], dst: &mut [u8], algorithm: Algorithm) -> Result<us
/// Stream decoder for gzip/lz4/zstd.
pub enum Decoder<'a, R: Read> {
None(R),
Gzip(flate2::bufread::GzDecoder<BufReader<R>>),
Gzip(flate2::bufread::MultiGzDecoder<BufReader<R>>),
Zstd(zstd::stream::Decoder<'a, BufReader<R>>),
}

Expand All @@ -152,7 +152,7 @@ impl<'a, R: Read> Decoder<'a, R> {
let decoder = match algorithm {
Algorithm::None => Decoder::None(reader),
Algorithm::GZip => {
Decoder::Gzip(flate2::bufread::GzDecoder::new(BufReader::new(reader)))
Decoder::Gzip(flate2::bufread::MultiGzDecoder::new(BufReader::new(reader)))
}
Algorithm::Lz4Block => panic!("Decoder doesn't support lz4_block"),
Algorithm::Zstd => Decoder::Zstd(zstd::stream::Decoder::new(reader)?),
Expand All @@ -173,14 +173,14 @@ impl<'a, R: Read> Read for Decoder<'a, R> {

/// Stream decoder for zlib/gzip.
pub struct ZlibDecoder<R> {
stream: flate2::bufread::GzDecoder<BufReader<R>>,
stream: flate2::bufread::MultiGzDecoder<BufReader<R>>,
}

impl<R: Read> ZlibDecoder<R> {
/// Create a new instance of `ZlibDecoder`.
pub fn new(reader: R) -> Self {
ZlibDecoder {
stream: flate2::bufread::GzDecoder::new(BufReader::new(reader)),
stream: flate2::bufread::MultiGzDecoder::new(BufReader::new(reader)),
}
}
}
Expand Down
127 changes: 114 additions & 13 deletions utils/src/compress/zlib_random.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use std::{mem, ptr};

use libz_sys::{
inflate, inflateEnd, inflateInit2_, inflatePrime, inflateReset, inflateSetDictionary, uInt,
z_stream, zlibVersion, Z_BLOCK, Z_BUF_ERROR, Z_OK, Z_STREAM_END,
z_stream, zlibVersion, Z_BLOCK, Z_BUF_ERROR, Z_OK, Z_STREAM_END, inflateReset2,
};
use sha2::{Digest, Sha256};

Expand Down Expand Up @@ -118,19 +118,57 @@ impl ZranDecoder {
self.stream.set_dict(dict)?;

self.stream.set_next_in(input);
self.stream.set_next_out(output);
self.stream.set_avail_out(ctx.out_len as uInt);
let ret = self.stream.inflate(true);
match ret {
Z_OK => {
let count = self.stream.next_out() as usize - output.as_ptr() as usize;
if count != ctx.out_len as usize {
Err(eio!("failed to decode data from stream, size mismatch"))
} else {
Ok(count)

let mut left = ctx.out_len;
loop {
let used = (ctx.out_len - left) as usize;
self.stream.set_next_out(&mut output[used..]);
self.stream.set_avail_out(left as uInt);
let mut got = self.stream.avail_out();
let mut ret = self.stream.raw_inflate(0);
got -= self.stream.avail_out();
left -= got;

match ret {
Z_OK => {
let count = self.stream.next_out() as usize - output.as_ptr() as usize;
if count != ctx.out_len as usize {
return Err(eio!("failed to decode data from stream, size mismatch"))
} else {
return Ok(count)
}
}
Z_STREAM_END => {
// Discard the gzip trailer.
let drop = 8;
if self.stream.avail_in() >= drop {
let avail_in = self.stream.avail_in();
let used = input.len() - avail_in as usize + drop as usize;
self.stream.set_next_in(&input[used..]);
} else {
// The input does not have a complete trailer.
return Err(eio!("the input does not have a complete gzip trailer"));
}
// Use inflate to skip the gzip header and resume the raw inflate there.
self.stream.reset2(true)?;
let mut discard = vec![0u8; ZRAN_DICT_WIN_SIZE as usize];
loop {
self.stream.set_next_out(&mut discard);
self.stream.set_avail_out(ZRAN_DICT_WIN_SIZE as u32);
ret = self.stream.raw_inflate(Z_BLOCK); // stop at end of header
if ret == Z_OK && (self.stream.data_type() & 0x80) ==0 {
continue
}

if ret != Z_OK {
return Err(eio!(format!("failed to handle gzip multi member, ret: {:?}", ret)))
}
self.stream.reset2(false)?;
break;
}
}
e => return Err(eio!(format!("failed to decode data from compressed data stream, ret: {}", e))),
}
_ => Err(eio!("failed to decode data from compressed data stream")),
}
}
}
Expand Down Expand Up @@ -531,12 +569,16 @@ impl ZranStream {
let mode = if decode { 0 } else { Z_BLOCK };
self.total_in += self.stream.avail_in as u64;
self.total_out += self.stream.avail_out as u64;
let ret = unsafe { inflate(self.stream.deref_mut() as *mut z_stream, mode) };
let ret = self.raw_inflate(mode);
self.total_in -= self.stream.avail_in as u64;
self.total_out -= self.stream.avail_out as u64;
ret
}

fn raw_inflate(&mut self, mode: i32) -> i32 {
unsafe { inflate(self.stream.deref_mut() as *mut z_stream, mode) }
}

fn reset(&mut self) -> Result<()> {
let ret = unsafe { inflateReset(self.stream.deref_mut() as *mut z_stream) };
if ret != Z_OK {
Expand All @@ -545,6 +587,15 @@ impl ZranStream {
Ok(())
}

fn reset2(&mut self, is_gzip: bool) -> Result<()> {
let winodw_bits = if is_gzip { 31 } else { -15 };
let ret = unsafe { inflateReset2(self.stream.deref_mut() as *mut z_stream, winodw_bits) };
if ret != Z_OK {
return Err(einval!("failed to reset zlib inflate context"));
}
Ok(())
}

fn get_compression_info(&mut self, buf: &[u8], stream_switched: u8) -> ZranCompInfo {
let previous_byte = if self.stream.data_type & 0x7 != 0 {
assert!(self.stream.next_in as usize >= buf.as_ptr() as usize);
Expand Down Expand Up @@ -618,6 +669,14 @@ impl ZranStream {
self.stream.avail_in
}

fn avail_out(&self) -> u32 {
self.stream.avail_out
}

fn data_type(&self) -> i32 {
self.stream.data_type
}

fn set_avail_in(&mut self, avail_in: u32) {
self.stream.avail_in = avail_in;
}
Expand Down Expand Up @@ -844,6 +903,48 @@ mod tests {
assert_eq!(ctx.len(), 3);
}

#[test]
fn test_zran_bgzip() {
let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR");
let path = PathBuf::from(root_dir).join("../tests/texture/zran/bgzip.tar.gz");
let file = OpenOptions::new().read(true).open(&path).unwrap();
let reader = ZranReader::new(file).unwrap();
let mut tar = Archive::new(reader.clone());
tar.set_ignore_zeros(true);
let mut generator = ZranGenerator::new(reader);
generator.set_min_compressed_size(1024);
generator.set_max_compressed_size(2048);
generator.set_max_uncompressed_size(4096);

let entries = tar.entries().unwrap();
for entry in entries {
let mut entry = entry.unwrap();
if entry.header().entry_type() == EntryType::Regular {
loop {
let _start = generator.begin_read(512).unwrap();
let mut buf = vec![0u8; 512];
let sz = entry.read(&mut buf).unwrap();
let _info = generator.end_read().unwrap();
if sz == 0 {
break;
}
}
}
}

let ctx_array = generator.get_compression_ctx_array();
for ctx in ctx_array.iter() {
let mut c_buf = vec![0u8; ctx.in_len as usize];
let mut file = OpenOptions::new().read(true).open(&path).unwrap();
file.seek(SeekFrom::Start(ctx.in_offset)).unwrap();
file.read_exact(&mut c_buf).unwrap();

let mut d_buf = vec![0u8; ctx.out_len as usize];
let mut decoder = ZranDecoder::new().unwrap();
decoder.uncompress(ctx, None, &c_buf, &mut d_buf).unwrap();
}
}

#[test]
fn test_zran_decoder() {
let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR");
Expand Down

0 comments on commit 68e6238

Please sign in to comment.