Skip to content

Commit

Permalink
bpf, arm64: Optimize BPF store/load using str/ldr with immediate offset
Browse files Browse the repository at this point in the history
The current BPF store/load instruction is translated by the JIT into two
instructions. The first instruction moves the immediate offset into a
temporary register. The second instruction uses this temporary register
to do the real store/load.

In fact, arm64 supports addressing with immediate offsets. So This patch
introduces optimization that uses arm64 str/ldr instruction with immediate
offset when the offset fits.

Example of generated instuction for r2 = *(u64 *)(r1 + 0):

without optimization:
mov x10, 0
ldr x1, [x0, x10]

with optimization:
ldr x1, [x0, 0]

If the offset is negative, or is not aligned correctly, or exceeds max
value, rollback to the use of temporary register.

Signed-off-by: Xu Kuohai <[email protected]>
  • Loading branch information
Xu Kuohai authored and Nobody committed Mar 11, 2022
1 parent dcbceeb commit a9b15ce
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 53 deletions.
9 changes: 9 additions & 0 deletions arch/arm64/include/asm/insn.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ enum aarch64_insn_size_type {
enum aarch64_insn_ldst_type {
AARCH64_INSN_LDST_LOAD_REG_OFFSET,
AARCH64_INSN_LDST_STORE_REG_OFFSET,
AARCH64_INSN_LDST_LOAD_IMM_OFFSET,
AARCH64_INSN_LDST_STORE_IMM_OFFSET,
AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX,
AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
Expand Down Expand Up @@ -334,13 +336,15 @@ __AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00)
__AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400)
__AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400)
__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
__AARCH64_INSN_FUNCS(str_imm, 0x3FC00000, 0x39000000)
__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000)
__AARCH64_INSN_FUNCS(ldclr, 0x3F20FC00, 0x38201000)
__AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000)
__AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000)
__AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000)
__AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00)
__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
__AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
Expand Down Expand Up @@ -500,6 +504,11 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
enum aarch64_insn_register offset,
enum aarch64_insn_size_type size,
enum aarch64_insn_ldst_type type);
u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg,
enum aarch64_insn_register base,
unsigned int imm,
enum aarch64_insn_size_type size,
enum aarch64_insn_ldst_type type);
u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
enum aarch64_insn_register reg2,
enum aarch64_insn_register base,
Expand Down
67 changes: 53 additions & 14 deletions arch/arm64/lib/insn.c
Original file line number Diff line number Diff line change
Expand Up @@ -299,29 +299,24 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type,
return insn;
}

static const u32 aarch64_insn_ldst_size[] = {
[AARCH64_INSN_SIZE_8] = 0,
[AARCH64_INSN_SIZE_16] = 1,
[AARCH64_INSN_SIZE_32] = 2,
[AARCH64_INSN_SIZE_64] = 3,
};

static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type,
u32 insn)
{
u32 size;

switch (type) {
case AARCH64_INSN_SIZE_8:
size = 0;
break;
case AARCH64_INSN_SIZE_16:
size = 1;
break;
case AARCH64_INSN_SIZE_32:
size = 2;
break;
case AARCH64_INSN_SIZE_64:
size = 3;
break;
default:
if (type < AARCH64_INSN_SIZE_8 || type > AARCH64_INSN_SIZE_64) {
pr_err("%s: unknown size encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}

size = aarch64_insn_ldst_size[type];
insn &= ~GENMASK(31, 30);
insn |= size << 30;

Expand Down Expand Up @@ -504,6 +499,50 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
offset);
}

u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg,
enum aarch64_insn_register base,
unsigned int imm,
enum aarch64_insn_size_type size,
enum aarch64_insn_ldst_type type)
{
u32 insn;
u32 shift;

if (size < AARCH64_INSN_SIZE_8 || size > AARCH64_INSN_SIZE_64) {
pr_err("%s: unknown size encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}

shift = aarch64_insn_ldst_size[size];
if (imm & ~(BIT(12 + shift) - BIT(shift))) {
pr_err("%s: invalid imm: %d\n", __func__, imm);
return AARCH64_BREAK_FAULT;
}

imm >>= shift;

switch (type) {
case AARCH64_INSN_LDST_LOAD_IMM_OFFSET:
insn = aarch64_insn_get_ldr_imm_value();
break;
case AARCH64_INSN_LDST_STORE_IMM_OFFSET:
insn = aarch64_insn_get_str_imm_value();
break;
default:
pr_err("%s: unknown load/store encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}

insn = aarch64_insn_encode_ldst_size(size, insn);

insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);

insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
base);

return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
}

u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
enum aarch64_insn_register reg2,
enum aarch64_insn_register base,
Expand Down
14 changes: 14 additions & 0 deletions arch/arm64/net/bpf_jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,20 @@
#define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE)
#define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD)

/* Load/store register (immediate offset) */
#define A64_LS_IMM(Rt, Rn, imm, size, type) \
aarch64_insn_gen_load_store_imm(Rt, Rn, imm, \
AARCH64_INSN_SIZE_##size, \
AARCH64_INSN_LDST_##type##_IMM_OFFSET)
#define A64_STRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, STORE)
#define A64_LDRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, LOAD)
#define A64_STRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, STORE)
#define A64_LDRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, LOAD)
#define A64_STR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, STORE)
#define A64_LDR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, LOAD)
#define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE)
#define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD)

/* Load/store register pair */
#define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \
aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \
Expand Down
76 changes: 37 additions & 39 deletions arch/arm64/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -971,20 +971,22 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
emit_a64_mov_i(1, tmp, off, ctx);

#define BUILD_LDX_INSN(a, b, c) \
case BPF_##a: \
if ((off & ((1 << c) - 1)) == 0 && off >= 0 && off <= (0xFFF << c)) { \
emit(A64_LDR##b##I(dst, src, off), ctx); \
} else { \
emit_a64_mov_i(1, tmp, off, ctx); \
emit(A64_LDR##b(dst, src, tmp), ctx); \
} \
break;

switch (BPF_SIZE(code)) {
case BPF_W:
emit(A64_LDR32(dst, src, tmp), ctx);
break;
case BPF_H:
emit(A64_LDRH(dst, src, tmp), ctx);
break;
case BPF_B:
emit(A64_LDRB(dst, src, tmp), ctx);
break;
case BPF_DW:
emit(A64_LDR64(dst, src, tmp), ctx);
break;
BUILD_LDX_INSN(W, 32, 2)
BUILD_LDX_INSN(H, H, 1)
BUILD_LDX_INSN(B, B, 0)
BUILD_LDX_INSN(DW, 64, 3)
}

ret = add_exception_handler(insn, ctx, dst);
Expand All @@ -1010,22 +1012,25 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
case BPF_ST | BPF_MEM | BPF_H:
case BPF_ST | BPF_MEM | BPF_B:
case BPF_ST | BPF_MEM | BPF_DW:

#define __BUILD_STX_INSN(a, b, c, d, e) \
case BPF_##a: \
if ((off & ((1 << c) - 1)) == 0 && off >= 0 && off <= (0xFFF << c)) { \
emit(A64_STR##b##I(d, dst, off), ctx); \
} else { \
emit_a64_mov_i(1, e, off, ctx); \
emit(A64_STR##b(d, dst, e), ctx); \
} \
break;

#define BUILD_ST_INSN(a, b, c) __BUILD_STX_INSN(a, b, c, tmp, tmp2)
/* Load imm to a register then store it */
emit_a64_mov_i(1, tmp2, off, ctx);
emit_a64_mov_i(1, tmp, imm, ctx);
switch (BPF_SIZE(code)) {
case BPF_W:
emit(A64_STR32(tmp, dst, tmp2), ctx);
break;
case BPF_H:
emit(A64_STRH(tmp, dst, tmp2), ctx);
break;
case BPF_B:
emit(A64_STRB(tmp, dst, tmp2), ctx);
break;
case BPF_DW:
emit(A64_STR64(tmp, dst, tmp2), ctx);
break;
BUILD_ST_INSN(W, 32, 2)
BUILD_ST_INSN(H, H, 1)
BUILD_ST_INSN(B, B, 0)
BUILD_ST_INSN(DW, 64, 3)
}
break;

Expand All @@ -1034,20 +1039,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
case BPF_STX | BPF_MEM | BPF_H:
case BPF_STX | BPF_MEM | BPF_B:
case BPF_STX | BPF_MEM | BPF_DW:
emit_a64_mov_i(1, tmp, off, ctx);

#define BUILD_STX_INSN(a, b, c) __BUILD_STX_INSN(a, b, c, src, tmp)
switch (BPF_SIZE(code)) {
case BPF_W:
emit(A64_STR32(src, dst, tmp), ctx);
break;
case BPF_H:
emit(A64_STRH(src, dst, tmp), ctx);
break;
case BPF_B:
emit(A64_STRB(src, dst, tmp), ctx);
break;
case BPF_DW:
emit(A64_STR64(src, dst, tmp), ctx);
break;
BUILD_STX_INSN(W, 32, 2)
BUILD_STX_INSN(H, H, 1)
BUILD_STX_INSN(B, B, 0)
BUILD_STX_INSN(DW, 64, 3)
}
break;

Expand Down

0 comments on commit a9b15ce

Please sign in to comment.