Skip to content

Commit

Permalink
Add format spec. for LSMT/ZFile
Browse files Browse the repository at this point in the history
Co-authored-by: Huiba Li <[email protected]>

Signed-off-by: Yifan Yuan <[email protected]>
  • Loading branch information
BigVan committed Jul 10, 2023
1 parent 55bdda6 commit 0d1d02d
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 29 deletions.
3 changes: 1 addition & 2 deletions src/overlaybd/lsmt/file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,7 @@ struct HeaderTrailer {

UUID::String uuid; // 37 bytes.
UUID::String parent_uuid; // 37 bytes.
uint8_t from; // DEPRECATED
uint8_t to; // DEPRECATED
uint16_t reserved; // Reserved.

static const uint8_t LSMT_V1 = 1; // v1 (UUID check)
static const uint8_t LSMT_SUB_V1 = 1; // .1 deprecated level range.
Expand Down
Empty file.
6 changes: 3 additions & 3 deletions src/overlaybd/zfile/compressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ class LZ4Compressor : public BaseCompressor {
LOG_ERROR_RETURN(EINVAL, -1, "BaseCompressor init failed");
}
auto opt = &args->opt;
if (opt->type != CompressOptions::LZ4) {
if (opt->algo != CompressOptions::LZ4) {
LOG_ERROR_RETURN(EINVAL, -1,
"Compression type invalid. (expected: CompressionOptions::LZ4)");
}
Expand Down Expand Up @@ -268,7 +268,7 @@ class Compressor_zstd : public BaseCompressor {
LOG_ERROR_RETURN(EINVAL, -1, "BaseCompressor init failed");
}
const CompressOptions *opt = &args->opt;
if (opt->type != CompressOptions::ZSTD) {
if (opt->algo != CompressOptions::ZSTD) {
LOG_ERROR_RETURN(EINVAL, -1,
"Compression type invalid.(expected: CompressionOptions::ZSTD)");
}
Expand Down Expand Up @@ -345,7 +345,7 @@ ICompressor *create_compressor(const CompressArgs *args) {
ICompressor *rst = nullptr;
int init_flg = 0;
const CompressOptions &opt = args->opt;
switch (opt.type) {
switch (opt.algo) {

case CompressOptions::LZ4:
rst = new LZ4Compressor;
Expand Down
12 changes: 6 additions & 6 deletions src/overlaybd/zfile/compressor.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,18 @@ class CompressOptions {
const static uint32_t DEFAULT_BLOCK_SIZE = 4096; // 8192;//32768;

uint32_t block_size = DEFAULT_BLOCK_SIZE;
uint8_t type = LZ4; // algorithm
uint8_t algo = LZ4; // algorithm
uint8_t level = 0; // compress level
uint8_t use_dict = 0;
uint8_t __padding_0 = 0;
uint32_t args = 0; // reserve;
uint32_t reserved = 0;
uint32_t dict_size = 0;
uint8_t verify = 0;
uint8_t __padding_1[7] = {0};

CompressOptions(uint8_t type = LZ4, uint32_t block_size = DEFAULT_BLOCK_SIZE,
uint8_t verify = 0)
: block_size(block_size), type(type), verify(verify) {
: block_size(block_size), algo(type), verify(verify) {
}
};
static_assert(sizeof(CompressOptions) == 24, "sizeof(CompressOptions) != 24");
Expand Down Expand Up @@ -79,12 +79,12 @@ class ICompressor {
*/
virtual int compress(const unsigned char *src, size_t src_len, unsigned char *dst,
size_t dst_len) = 0;
/*
/*
return the number of batches in QAT compressing...
*/
virtual int nbatch() = 0;
virtual int compress_batch(const unsigned char *src, size_t *src_chunk_len, unsigned char *dst,
size_t dst_buffer_capacity, size_t *dst_chunk_len /* save result chunk length */,
size_t dst_buffer_capacity, size_t *dst_chunk_len /* save result chunk length */,
size_t nchunk) = 0;
/*
return decompressed buffer size.
Expand All @@ -94,7 +94,7 @@ class ICompressor {
size_t dst_len) = 0;

virtual int decompress_batch(const unsigned char *src, size_t *src_chunk_len, unsigned char *dst,
size_t dst_buffer_capacity, size_t *dst_chunk_len /* save result chunk length */,
size_t dst_buffer_capacity, size_t *dst_chunk_len /* save result chunk length */,
size_t nchunk) = 0;
};

Expand Down
59 changes: 59 additions & 0 deletions src/overlaybd/zfile/format_spec.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# ZFile format
## Overview
ZFile is a generic compression format that realizes random read of the compressed
file and online decompression, providing the users with an illusion of reading the
original file. ZFile is not tied with overlaybd. Instead, it works with arbitary
underlay file.


| Section | Size (bytes) | Description |
| :---: | :----: | :--- |
| header | 512 | file header |
| data | variable | compressed blocks of the original file |
| dict | variable | optional dictionary to assist decompression |
| index | variable | a jump table that stores the size of each compressed block, which can by easily transformed into offset of the block at runtime |
| trailer | 512 | file trailer (similar to header) |

## header
The format of header is described as below. All fields are little-endian.

| Field | Offset (bytes) | Size (bytes) | Description |
| :---: | :----: | :----: | :--- |
| magic0 | 0 | 8 | "ZFile\0\1" (and an implicit '\0') |
| magic1 | 8 | 16 | 74 75 6A 69, 2E 79 79 66, 40 41 6C 69, 62 61 62 61 |
| size | 24 | uint32_t | size of the header struct (108), excluding the tail padding |
| reserved| 28 | 4 | reserved space, should be 0 |
| flags | 32 | uint64_t | bits for flags* (see later for details) |
| index_offset | 40 | uint64_t | index offset |
| index_size | 48 | uint64_t | size of the index section, possibly compressed|
| original_file_size | 56 | uint64_t | size of the orignal file before compression |
| reserved| 64 | 8 | reserved space, should be 0 |
| block_size | 72 | uint32_t | size of each compression block |
| algo | 76 | uint8_t | compression algorithm |
| level | 77 | uint8_t | compression level |
| use_dict| 78 | bool | whether use dictionary |
| reserved| 79 | 5 | reserved space, should be 0 |
| dict_size | 84 | uint32_t | size of the dictionary section, 0 for non-existence |
| verify | 88 | bool | whether these exists a 4-byte CRC32 checksum following each compressed block |
| reserved| 89 | 7 | reserved space, should be 0 |
| reserved| 96 | 416 | reserved space for future use (offset 96 ~ 511), should be 0 |

***flags:**
| Field | Offset (bits) | Description |
| :---: | :----: | :--- |
| is_header | 0 | header (1) or trailer (0) |
| type | 1 | this is a data file (1) or index file (0) |
| sealed | 2 | this file is sealed (1) or not (0) |
| info_valid | 3 | information validity of the fields *after* flags (they were initially invalid (0) after creation; and readers must resort to trailer when they meet such headers) |
| reserved | 4~63 | reserved for future use; must be 0s |


## index
The index section is a table of (uint32_t) compressed size of each data block.
The whole section may be compressed with the same compression algorithm and
level.

## trailer
An updated edition of header, in the same format. Trailer is useful in
append-only storage during creation of the blob. Use trailer whenever
possible.
4 changes: 2 additions & 2 deletions src/overlaybd/zfile/test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ TEST_F(ZFileTest, verify_compression) {
LOG_ERROR("err: `(`)", errno, strerror(errno));
}
CompressOptions opt;
opt.type = algorithm;
opt.algo = algorithm;
opt.verify = enable_crc;
opt.block_size = 1<<bs;
CompressArgs args(opt);
Expand Down Expand Up @@ -216,7 +216,7 @@ TEST_F(ZFileTest, validation_check) {
LOG_ERROR("err: `(`)", errno, strerror(errno));
}
CompressOptions opt;
opt.type = CompressOptions::LZ4;
opt.algo = CompressOptions::LZ4;
opt.verify = 1;
CompressArgs args(opt);
int ret = zfile_compress(fsrc.get(), fdst.get(), &args);
Expand Down
24 changes: 12 additions & 12 deletions src/overlaybd/zfile/zfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class CompressionFile : public VirtualReadOnlyFile {
// offset 40, 48, 56, 64
uint64_t index_offset; // in bytes
uint64_t index_size; // # of SegmentMappings
uint64_t raw_data_size;
uint64_t original_file_size;
uint64_t reserved_0;
// offset 72
CompressOptions opt;
Expand Down Expand Up @@ -234,7 +234,7 @@ class CompressionFile : public VirtualReadOnlyFile {
auto ret = m_file->fstat(buf);
if (ret != 0)
return ret;
buf->st_size = m_ht.raw_data_size;
buf->st_size = m_ht.original_file_size;
return ret;
}

Expand Down Expand Up @@ -422,16 +422,16 @@ class CompressionFile : public VirtualReadOnlyFile {
LOG_ERROR_RETURN(ENOMEM, -1, "block_size: ` > MAX_READ_SIZE (`)", m_ht.opt.block_size,
MAX_READ_SIZE);
}
if (offset + count > m_ht.raw_data_size) {
if (offset + count > m_ht.original_file_size) {
LOG_WARN("the read range exceeds raw_file_size.(`>`)", count + offset,
m_ht.raw_data_size);
count = m_ht.raw_data_size - offset;
m_ht.original_file_size);
count = m_ht.original_file_size - offset;
}
if (count <= 0)
return 0;
if (offset + count > m_ht.raw_data_size) {
if (offset + count > m_ht.original_file_size) {
LOG_ERRNO_RETURN(ERANGE, -1, "pread range exceed (` > `)",
offset + count, m_ht.raw_data_size);
offset + count, m_ht.original_file_size);
}
ssize_t readn = 0; // final will equal to count
unsigned char raw[MAX_READ_SIZE];
Expand Down Expand Up @@ -522,7 +522,7 @@ class ZFileBuilder : public VirtualReadOnlyFile {
: m_dest(file), m_opt(args->opt), m_ownership(ownership) {

LOG_INFO("create stream compressing object. [ block size: `, type: `, enable_checksum: `]",
m_opt.block_size, m_opt.type, m_opt.verify);
m_opt.block_size, m_opt.algo, m_opt.verify);
}

int init(const CompressArgs *args) {
Expand Down Expand Up @@ -577,7 +577,7 @@ class ZFileBuilder : public VirtualReadOnlyFile {
auto pht = (CompressionFile::HeaderTrailer *)m_ht;
pht->index_offset = index_offset;
pht->index_size = index_size;
pht->raw_data_size = raw_data_size;
pht->original_file_size = raw_data_size;
LOG_INFO("write trailer.");
auto ret = write_header_trailer(m_dest, false, true, true, pht);
if (ret < 0)
Expand Down Expand Up @@ -748,7 +748,7 @@ IFile *zfile_open_ro(IFile *file, bool verify, bool ownership) {
zfile->m_jump_table = std::move(jump_table);
CompressArgs args(ht.opt);
ht.opt.verify = ht.opt.verify && verify;
LOG_DEBUG("compress type: `, bs: `, verify_checksum: `", ht.opt.type, ht.opt.block_size,
LOG_DEBUG("compress type: `, bs: `, verify_checksum: `", ht.opt.algo, ht.opt.block_size,
ht.opt.verify);

zfile->m_compressor.reset(create_compressor(&args));
Expand Down Expand Up @@ -792,7 +792,7 @@ int zfile_compress(IFile *file, IFile *as, const CompressArgs *args) {
}
CompressOptions opt = args->opt;
LOG_INFO("create compress file. [ block size: `, type: `, enable_checksum: `]", opt.block_size,
opt.type, opt.verify);
opt.algo, opt.verify);
auto compressor = create_compressor(args);
DEFER(delete compressor);
if (compressor == nullptr)
Expand Down Expand Up @@ -874,7 +874,7 @@ int zfile_compress(IFile *file, IFile *as, const CompressArgs *args) {
}
pht->index_offset = index_offset;
pht->index_size = index_size;
pht->raw_data_size = raw_data_size;
pht->original_file_size = raw_data_size;
LOG_INFO("write trailer.");
ret = write_header_trailer(as, false, true, true, pht);
if (ret < 0)
Expand Down
4 changes: 2 additions & 2 deletions src/tools/overlaybd-commit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ int main(int argc, char **argv) {
algorithm = "lz4";
}
if (algorithm == "lz4") {
opt.type = ZFile::CompressOptions::LZ4;
opt.algo = ZFile::CompressOptions::LZ4;
} else if (algorithm == "zstd") {
opt.type = ZFile::CompressOptions::ZSTD;
opt.algo = ZFile::CompressOptions::ZSTD;
} else {
fprintf(stderr, "invalid '--algorithm' parameters.\n");
exit(-1);
Expand Down
4 changes: 2 additions & 2 deletions src/tools/overlaybd-zfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@ int main(int argc, char **argv) {
CompressOptions opt;
opt.verify = 1;
if (algorithm == "lz4") {
opt.type = CompressOptions::LZ4;
opt.algo = CompressOptions::LZ4;
} else if (algorithm == "zstd") {
opt.type = CompressOptions::ZSTD;
opt.algo = CompressOptions::ZSTD;
}
opt.block_size = block_size * 1024;
if ((opt.block_size & (opt.block_size - 1)) != 0 || (block_size > 64 || block_size < 4)) {
Expand Down

0 comments on commit 0d1d02d

Please sign in to comment.