diff options
author | Jie Meng <jmeng@fb.com> | 2021-10-01 20:56:26 -0700 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2021-10-06 15:24:36 +0200 |
commit | 57a610f1c58fa493315e1c24eef6d992cdf4c4a9 (patch) | |
tree | b19776aea72a4c4a6bf2d48e8cebccd281375f6d | |
parent | 0640c77c46cb84328d5e08aa85a781a60be8b02b (diff) |
bpf, x64: Save bytes for DIV by reducing reg copies
Instead of unconditionally performing push/pop on %rax/%rdx in case of
division/modulo, we can save a few bytes in case of destination register
being either BPF r0 (%rax) or r3 (%rdx) since the result is written in
there anyway.
Also, we do not need to copy the source to %r11 unless the source is either
%rax, %rdx or an immediate.
For example, before the patch:
22: push %rax
23: push %rdx
24: mov %rsi,%r11
27: xor %edx,%edx
29: div %r11
2c: mov %rax,%r11
2f: pop %rdx
30: pop %rax
31: mov %r11,%rax
After:
22: push %rdx
23: xor %edx,%edx
25: div %rsi
28: pop %rdx
Signed-off-by: Jie Meng <jmeng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20211002035626.2041910-1-jmeng@fb.com
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 71 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/verifier/jit.c | 47 |
2 files changed, 89 insertions, 29 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 576ef1a6954a..5a0edea3cc2e 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1028,19 +1028,30 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_K: - case BPF_ALU64 | BPF_DIV | BPF_K: - EMIT1(0x50); /* push rax */ - EMIT1(0x52); /* push rdx */ - - if (BPF_SRC(insn->code) == BPF_X) - /* mov r11, src_reg */ - EMIT_mov(AUX_REG, src_reg); - else + case BPF_ALU64 | BPF_DIV | BPF_K: { + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; + + if (dst_reg != BPF_REG_0) + EMIT1(0x50); /* push rax */ + if (dst_reg != BPF_REG_3) + EMIT1(0x52); /* push rdx */ + + if (BPF_SRC(insn->code) == BPF_X) { + if (src_reg == BPF_REG_0 || + src_reg == BPF_REG_3) { + /* mov r11, src_reg */ + EMIT_mov(AUX_REG, src_reg); + src_reg = AUX_REG; + } + } else { /* mov r11, imm32 */ EMIT3_off32(0x49, 0xC7, 0xC3, imm32); + src_reg = AUX_REG; + } - /* mov rax, dst_reg */ - EMIT_mov(BPF_REG_0, dst_reg); + if (dst_reg != BPF_REG_0) + /* mov rax, dst_reg */ + emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg); /* * xor edx, edx @@ -1048,26 +1059,28 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, */ EMIT2(0x31, 0xd2); - if (BPF_CLASS(insn->code) == BPF_ALU64) - /* div r11 */ - EMIT3(0x49, 0xF7, 0xF3); - else - /* div r11d */ - EMIT3(0x41, 0xF7, 0xF3); - - if (BPF_OP(insn->code) == BPF_MOD) - /* mov r11, rdx */ - EMIT3(0x49, 0x89, 0xD3); - else - /* mov r11, rax */ - EMIT3(0x49, 0x89, 0xC3); - - EMIT1(0x5A); /* pop rdx */ - EMIT1(0x58); /* pop rax */ - - /* mov dst_reg, r11 */ - EMIT_mov(dst_reg, AUX_REG); + if (is64) + EMIT1(add_1mod(0x48, src_reg)); + else if (is_ereg(src_reg)) + EMIT1(add_1mod(0x40, src_reg)); + /* div src_reg */ + EMIT2(0xF7, add_1reg(0xF0, src_reg)); + + if (BPF_OP(insn->code) == BPF_MOD && + dst_reg != BPF_REG_3) + /* mov dst_reg, rdx */ + emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3); + else if (BPF_OP(insn->code) == BPF_DIV && + dst_reg != BPF_REG_0) + /* mov dst_reg, rax */ + emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0); + + if (dst_reg != BPF_REG_3) + EMIT1(0x5A); /* pop rdx */ + if (dst_reg != BPF_REG_0) + EMIT1(0x58); /* pop rax */ break; + } case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K: diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c index eedcb752bf70..79021c30e51e 100644 --- a/tools/testing/selftests/bpf/verifier/jit.c +++ b/tools/testing/selftests/bpf/verifier/jit.c @@ -103,6 +103,53 @@ .retval = 2, }, { + "jit: various div tests", + .insns = { + BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL), + BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL), + BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_3, 0xeeff0d413122ULL), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_3, 0xfefeeeULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), + BPF_LD_IMM64(BPF_REG_3, 0xbeefULL), + BPF_ALU64_REG(BPF_MOD, BPF_REG_1, BPF_REG_3), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL), + BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), + BPF_LD_IMM64(BPF_REG_3, 0x2bULL), + BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ "jit: jsgt, jslt", .insns = { BPF_LD_IMM64(BPF_REG_1, 0x80000000ULL), |