Skip to content

Commit

Permalink
erofs: lzma compression support
Browse files Browse the repository at this point in the history
Add MicroLZMA support in order to maximize compression ratios for
specific scenarios. For example, it's useful for low-end embedded
boards and as a secondary algorithm in a file for specific access
patterns.

MicroLZMA is a new container format for raw LZMA1, which was created
by Lasse Collin aiming to minimize old LZMA headers and get rid of
unnecessary EOPM (end of payload marker) as well as to enable
fixed-sized output compression, especially for 4KiB pclusters.

Similar to LZ4, inplace I/O approach is used to minimize runtime
memory footprint when dealing with I/O. Overlapped decompression is
handled with 1) bounced buffer for data under processing or 2) extra
short-lived pages from the on-stack pagepool which will be shared in
the same read request (128KiB for example).

Link: https://lore.kernel.org/r/[email protected]
Acked-by: Chao Yu <[email protected]>
Signed-off-by: Gao Xiang <[email protected]>
  • Loading branch information
hsiangkao committed Oct 19, 2021
1 parent 966edfb commit 622cead
Show file tree
Hide file tree
Showing 11 changed files with 383 additions and 21 deletions.
16 changes: 16 additions & 0 deletions fs/erofs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,19 @@ config EROFS_FS_ZIP
Enable fixed-sized output compression for EROFS.

If you don't want to enable compression feature, say N.

config EROFS_FS_ZIP_LZMA
bool "EROFS LZMA compressed data support"
depends on EROFS_FS_ZIP
select XZ_DEC
select XZ_DEC_MICROLZMA
help
Saying Y here includes support for reading EROFS file systems
containing LZMA compressed data, specifically called microLZMA. it
gives better compression ratios than the LZ4 algorithm, at the
expense of more CPU overhead.

LZMA support is an experimental feature for now and so most file
systems will be readable without selecting this option.

If unsure, say N.
1 change: 1 addition & 0 deletions fs/erofs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
16 changes: 16 additions & 0 deletions fs/erofs/compress.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ struct z_erofs_decompress_req {
bool inplace_io, partial_decoding;
};

struct z_erofs_decompressor {
int (*decompress)(struct z_erofs_decompress_req *rq,
struct list_head *pagepool);
char *name;
};

/* some special page->private (unsigned long, see below) */
#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
#define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2)
Expand Down Expand Up @@ -75,7 +81,17 @@ static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
return true;
}

#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
struct page *page)
{
return page->mapping == MNGD_MAPPING(sbi);
}

int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct list_head *pagepool);

/* prototypes for specific algorithms */
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
struct list_head *pagepool);
#endif
12 changes: 6 additions & 6 deletions fs/erofs/decompressor.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@
#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32)
#endif

struct z_erofs_decompressor {
int (*decompress)(struct z_erofs_decompress_req *rq,
struct list_head *pagepool);
char *name;
};

int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int size)
Expand Down Expand Up @@ -349,6 +343,12 @@ static struct z_erofs_decompressor decompressors[] = {
.decompress = z_erofs_lz4_decompress,
.name = "lz4"
},
#ifdef CONFIG_EROFS_FS_ZIP_LZMA
[Z_EROFS_COMPRESSION_LZMA] = {
.decompress = z_erofs_lzma_decompress,
.name = "lzma"
},
#endif
};

int z_erofs_decompress(struct z_erofs_decompress_req *rq,
Expand Down
290 changes: 290 additions & 0 deletions fs/erofs/decompressor_lzma.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <linux/xz.h>
#include <linux/module.h>
#include "compress.h"

struct z_erofs_lzma {
struct z_erofs_lzma *next;
struct xz_dec_microlzma *state;
struct xz_buf buf;
u8 bounce[PAGE_SIZE];
};

/* considering the LZMA performance, no need to use a lockless list for now */
static DEFINE_SPINLOCK(z_erofs_lzma_lock);
static unsigned int z_erofs_lzma_max_dictsize;
static unsigned int z_erofs_lzma_nstrms, z_erofs_lzma_avail_strms;
static struct z_erofs_lzma *z_erofs_lzma_head;
static DECLARE_WAIT_QUEUE_HEAD(z_erofs_lzma_wq);

module_param_named(lzma_streams, z_erofs_lzma_nstrms, uint, 0444);

void z_erofs_lzma_exit(void)
{
/* there should be no running fs instance */
while (z_erofs_lzma_avail_strms) {
struct z_erofs_lzma *strm;

spin_lock(&z_erofs_lzma_lock);
strm = z_erofs_lzma_head;
if (!strm) {
spin_unlock(&z_erofs_lzma_lock);
DBG_BUGON(1);
return;
}
z_erofs_lzma_head = NULL;
spin_unlock(&z_erofs_lzma_lock);

while (strm) {
struct z_erofs_lzma *n = strm->next;

if (strm->state)
xz_dec_microlzma_end(strm->state);
kfree(strm);
--z_erofs_lzma_avail_strms;
strm = n;
}
}
}

int z_erofs_lzma_init(void)
{
unsigned int i;

/* by default, use # of possible CPUs instead */
if (!z_erofs_lzma_nstrms)
z_erofs_lzma_nstrms = num_possible_cpus();

for (i = 0; i < z_erofs_lzma_nstrms; ++i) {
struct z_erofs_lzma *strm = kzalloc(sizeof(*strm), GFP_KERNEL);

if (!strm) {
z_erofs_lzma_exit();
return -ENOMEM;
}
spin_lock(&z_erofs_lzma_lock);
strm->next = z_erofs_lzma_head;
z_erofs_lzma_head = strm;
spin_unlock(&z_erofs_lzma_lock);
++z_erofs_lzma_avail_strms;
}
return 0;
}

int z_erofs_load_lzma_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lzma_cfgs *lzma, int size)
{
static DEFINE_MUTEX(lzma_resize_mutex);
unsigned int dict_size, i;
struct z_erofs_lzma *strm, *head = NULL;
int err;

if (!lzma || size < sizeof(struct z_erofs_lzma_cfgs)) {
erofs_err(sb, "invalid lzma cfgs, size=%u", size);
return -EINVAL;
}
if (lzma->format) {
erofs_err(sb, "unidentified lzma format %x, please check kernel version",
le16_to_cpu(lzma->format));
return -EINVAL;
}
dict_size = le32_to_cpu(lzma->dict_size);
if (dict_size > Z_EROFS_LZMA_MAX_DICT_SIZE || dict_size < 4096) {
erofs_err(sb, "unsupported lzma dictionary size %u",
dict_size);
return -EINVAL;
}

erofs_info(sb, "EXPERIMENTAL MicroLZMA in use. Use at your own risk!");

/* in case 2 z_erofs_load_lzma_config() race to avoid deadlock */
mutex_lock(&lzma_resize_mutex);

if (z_erofs_lzma_max_dictsize >= dict_size) {
mutex_unlock(&lzma_resize_mutex);
return 0;
}

/* 1. collect/isolate all streams for the following check */
for (i = 0; i < z_erofs_lzma_avail_strms; ++i) {
struct z_erofs_lzma *last;

again:
spin_lock(&z_erofs_lzma_lock);
strm = z_erofs_lzma_head;
if (!strm) {
spin_unlock(&z_erofs_lzma_lock);
wait_event(z_erofs_lzma_wq,
READ_ONCE(z_erofs_lzma_head));
goto again;
}
z_erofs_lzma_head = NULL;
spin_unlock(&z_erofs_lzma_lock);

for (last = strm; last->next; last = last->next)
++i;
last->next = head;
head = strm;
}

err = 0;
/* 2. walk each isolated stream and grow max dict_size if needed */
for (strm = head; strm; strm = strm->next) {
if (strm->state)
xz_dec_microlzma_end(strm->state);
strm->state = xz_dec_microlzma_alloc(XZ_PREALLOC, dict_size);
if (!strm->state)
err = -ENOMEM;
}

/* 3. push back all to the global list and update max dict_size */
spin_lock(&z_erofs_lzma_lock);
DBG_BUGON(z_erofs_lzma_head);
z_erofs_lzma_head = head;
spin_unlock(&z_erofs_lzma_lock);

z_erofs_lzma_max_dictsize = dict_size;
mutex_unlock(&lzma_resize_mutex);
return err;
}

int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const unsigned int nrpages_in =
PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
unsigned int inputmargin, inlen, outlen, pageofs;
struct z_erofs_lzma *strm;
u8 *kin;
bool bounced = false;
int no, ni, j, err = 0;

/* 1. get the exact LZMA compressed size */
kin = kmap(*rq->in);
inputmargin = 0;
while (!kin[inputmargin & ~PAGE_MASK])
if (!(++inputmargin & ~PAGE_MASK))
break;

if (inputmargin >= PAGE_SIZE) {
kunmap(*rq->in);
return -EFSCORRUPTED;
}
rq->inputsize -= inputmargin;

/* 2. get an available lzma context */
again:
spin_lock(&z_erofs_lzma_lock);
strm = z_erofs_lzma_head;
if (!strm) {
spin_unlock(&z_erofs_lzma_lock);
wait_event(z_erofs_lzma_wq, READ_ONCE(z_erofs_lzma_head));
goto again;
}
z_erofs_lzma_head = strm->next;
spin_unlock(&z_erofs_lzma_lock);

/* 3. multi-call decompress */
inlen = rq->inputsize;
outlen = rq->outputsize;
xz_dec_microlzma_reset(strm->state, inlen, outlen,
!rq->partial_decoding);
pageofs = rq->pageofs_out;
strm->buf.in = kin + inputmargin;
strm->buf.in_pos = 0;
strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - inputmargin);
inlen -= strm->buf.in_size;
strm->buf.out = NULL;
strm->buf.out_pos = 0;
strm->buf.out_size = 0;

for (ni = 0, no = -1;;) {
enum xz_ret xz_err;

if (strm->buf.out_pos == strm->buf.out_size) {
if (strm->buf.out) {
kunmap(rq->out[no]);
strm->buf.out = NULL;
}

if (++no >= nrpages_out || !outlen) {
erofs_err(rq->sb, "decompressed buf out of bound");
err = -EFSCORRUPTED;
break;
}
strm->buf.out_pos = 0;
strm->buf.out_size = min_t(u32, outlen,
PAGE_SIZE - pageofs);
outlen -= strm->buf.out_size;
if (rq->out[no])
strm->buf.out = kmap(rq->out[no]) + pageofs;
pageofs = 0;
} else if (strm->buf.in_pos == strm->buf.in_size) {
kunmap(rq->in[ni]);

if (++ni >= nrpages_in || !inlen) {
erofs_err(rq->sb, "compressed buf out of bound");
err = -EFSCORRUPTED;
break;
}
strm->buf.in_pos = 0;
strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE);
inlen -= strm->buf.in_size;
kin = kmap(rq->in[ni]);
strm->buf.in = kin;
bounced = false;
}

/*
* Handle overlapping: Use bounced buffer if the compressed
* data is under processing; Otherwise, Use short-lived pages
* from the on-stack pagepool where pages share with the same
* request.
*/
if (!bounced && rq->out[no] == rq->in[ni]) {
memcpy(strm->bounce, strm->buf.in, strm->buf.in_size);
strm->buf.in = strm->bounce;
bounced = true;
}
for (j = ni + 1; j < nrpages_in; ++j) {
struct page *tmppage;

if (rq->out[no] != rq->in[j])
continue;

DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb),
rq->in[j]));
tmppage = erofs_allocpage(pagepool,
GFP_KERNEL | __GFP_NOFAIL);
set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
copy_highpage(tmppage, rq->in[j]);
rq->in[j] = tmppage;
}
xz_err = xz_dec_microlzma_run(strm->state, &strm->buf);
DBG_BUGON(strm->buf.out_pos > strm->buf.out_size);
DBG_BUGON(strm->buf.in_pos > strm->buf.in_size);

if (xz_err != XZ_OK) {
if (xz_err == XZ_STREAM_END && !outlen)
break;
erofs_err(rq->sb, "failed to decompress %d in[%u] out[%u]",
xz_err, rq->inputsize, rq->outputsize);
err = -EFSCORRUPTED;
break;
}
}
if (no < nrpages_out && strm->buf.out)
kunmap(rq->in[no]);
if (ni < nrpages_in)
kunmap(rq->in[ni]);
/* 4. push back LZMA stream context to the global list */
spin_lock(&z_erofs_lzma_lock);
strm->next = z_erofs_lzma_head;
z_erofs_lzma_head = strm;
spin_unlock(&z_erofs_lzma_lock);
wake_up(&z_erofs_lzma_wq);
return err;
}
Loading

0 comments on commit 622cead

Please sign in to comment.