diff options
34 files changed, 2354 insertions, 173 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8cd23d8309bf..2e586f579945 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -9,9 +9,11 @@ #include <linux/filter.h> #include <linux/if_vlan.h> #include <linux/bpf.h> +#include <linux/memory.h> #include <asm/extable.h> #include <asm/set_memory.h> #include <asm/nospec-branch.h> +#include <asm/text-patching.h> static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -96,6 +98,7 @@ static int bpf_size_to_x86_bytes(int bpf_size) /* Pick a register outside of BPF range for JIT internal work */ #define AUX_REG (MAX_BPF_JIT_REG + 1) +#define X86_REG_R9 (MAX_BPF_JIT_REG + 2) /* * The following table maps BPF registers to x86-64 registers. @@ -104,8 +107,8 @@ static int bpf_size_to_x86_bytes(int bpf_size) * register in load/store instructions, it always needs an * extra byte of encoding and is callee saved. * - * Also x86-64 register R9 is unused. x86-64 register R10 is - * used for blinding (if enabled). + * x86-64 register R9 is not used by BPF programs, but can be used by BPF + * trampoline. x86-64 register R10 is used for blinding (if enabled). */ static const int reg2hex[] = { [BPF_REG_0] = 0, /* RAX */ @@ -121,6 +124,7 @@ static const int reg2hex[] = { [BPF_REG_FP] = 5, /* RBP readonly */ [BPF_REG_AX] = 2, /* R10 temp register */ [AUX_REG] = 3, /* R11 temp register */ + [X86_REG_R9] = 1, /* R9 register, 6th function argument */ }; static const int reg2pt_regs[] = { @@ -148,6 +152,7 @@ static bool is_ereg(u32 reg) BIT(BPF_REG_7) | BIT(BPF_REG_8) | BIT(BPF_REG_9) | + BIT(X86_REG_R9) | BIT(BPF_REG_AX)); } @@ -198,8 +203,10 @@ struct jit_context { /* Maximum number of bytes emitted while JITing one eBPF insn */ #define BPF_MAX_INSN_SIZE 128 #define BPF_INSN_SAFETY 64 +/* number of bytes emit_call() needs to generate call instruction */ +#define X86_CALL_SIZE 5 -#define PROLOGUE_SIZE 20 +#define PROLOGUE_SIZE 25 /* * Emit x86-64 prologue code for BPF program and check its size. @@ -208,8 +215,13 @@ struct jit_context { static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) { u8 *prog = *pprog; - int cnt = 0; + int cnt = X86_CALL_SIZE; + /* BPF trampoline can be made to work without these nops, + * but let's waste 5 bytes for now and optimize later + */ + memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt); + prog += cnt; EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ /* sub rsp, rounded_stack_depth */ @@ -390,6 +402,149 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) *pprog = prog; } +/* LDX: dst_reg = *(u8*)(src_reg + off) */ +static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) +{ + u8 *prog = *pprog; + int cnt = 0; + + switch (size) { + case BPF_B: + /* Emit 'movzx rax, byte ptr [rax + off]' */ + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); + break; + case BPF_H: + /* Emit 'movzx rax, word ptr [rax + off]' */ + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); + break; + case BPF_W: + /* Emit 'mov eax, dword ptr [rax+0x14]' */ + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); + else + EMIT1(0x8B); + break; + case BPF_DW: + /* Emit 'mov rax, qword ptr [rax+0x14]' */ + EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); + break; + } + /* + * If insn->off == 0 we can save one extra byte, but + * special case of x86 R13 which always needs an offset + * is not worth the hassle + */ + if (is_imm8(off)) + EMIT2(add_2reg(0x40, src_reg, dst_reg), off); + else + EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off); + *pprog = prog; +} + +/* STX: *(u8*)(dst_reg + off) = src_reg */ +static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) +{ + u8 *prog = *pprog; + int cnt = 0; + + switch (size) { + case BPF_B: + /* Emit 'mov byte ptr [rax + off], al' */ + if (is_ereg(dst_reg) || is_ereg(src_reg) || + /* We have to add extra byte for x86 SIL, DIL regs */ + src_reg == BPF_REG_1 || src_reg == BPF_REG_2) + EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); + else + EMIT1(0x88); + break; + case BPF_H: + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); + else + EMIT2(0x66, 0x89); + break; + case BPF_W: + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); + else + EMIT1(0x89); + break; + case BPF_DW: + EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); + break; + } + if (is_imm8(off)) + EMIT2(add_2reg(0x40, dst_reg, src_reg), off); + else + EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off); + *pprog = prog; +} + +static int emit_call(u8 **pprog, void *func, void *ip) +{ + u8 *prog = *pprog; + int cnt = 0; + s64 offset; + + offset = func - (ip + X86_CALL_SIZE); + if (!is_simm32(offset)) { + pr_err("Target call %p is out of range\n", func); + return -EINVAL; + } + EMIT1_off32(0xE8, offset); + *pprog = prog; + return 0; +} + +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, + void *old_addr, void *new_addr) +{ + u8 old_insn[X86_CALL_SIZE] = {}; + u8 new_insn[X86_CALL_SIZE] = {}; + u8 *prog; + int ret; + + if (!is_kernel_text((long)ip) && + !is_bpf_text_address((long)ip)) + /* BPF trampoline in modules is not supported */ + return -EINVAL; + + if (old_addr) { + prog = old_insn; + ret = emit_call(&prog, old_addr, (void *)ip); + if (ret) + return ret; + } + if (new_addr) { + prog = new_insn; + ret = emit_call(&prog, new_addr, (void *)ip); + if (ret) + return ret; + } + ret = -EBUSY; + mutex_lock(&text_mutex); + switch (t) { + case BPF_MOD_NOP_TO_CALL: + if (memcmp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE)) + goto out; + text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL); + break; + case BPF_MOD_CALL_TO_CALL: + if (memcmp(ip, old_insn, X86_CALL_SIZE)) + goto out; + text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL); + break; + case BPF_MOD_CALL_TO_NOP: + if (memcmp(ip, old_insn, X86_CALL_SIZE)) + goto out; + text_poke_bp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE, NULL); + break; + } + ret = 0; +out: + mutex_unlock(&text_mutex); + return ret; +} static bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs, int trapnr, @@ -773,68 +928,22 @@ st: if (is_imm8(insn->off)) /* STX: *(u8*)(dst_reg + off) = src_reg */ case BPF_STX | BPF_MEM | BPF_B: - /* Emit 'mov byte ptr [rax + off], al' */ - if (is_ereg(dst_reg) || is_ereg(src_reg) || - /* We have to add extra byte for x86 SIL, DIL regs */ - src_reg == BPF_REG_1 || src_reg == BPF_REG_2) - EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); - else - EMIT1(0x88); - goto stx; case BPF_STX | BPF_MEM | BPF_H: - if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); - else - EMIT2(0x66, 0x89); - goto stx; case BPF_STX | BPF_MEM | BPF_W: - if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); - else - EMIT1(0x89); - goto stx; case BPF_STX | BPF_MEM | BPF_DW: - EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); -stx: if (is_imm8(insn->off)) - EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); - else - EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), - insn->off); + emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); break; /* LDX: dst_reg = *(u8*)(src_reg + off) */ case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_PROBE_MEM | BPF_B: - /* Emit 'movzx rax, byte ptr [rax + off]' */ - EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); - goto ldx; case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_H: - /* Emit 'movzx rax, word ptr [rax + off]' */ - EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); - goto ldx; case BPF_LDX | BPF_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_W: - /* Emit 'mov eax, dword ptr [rax+0x14]' */ - if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); - else - EMIT1(0x8B); - goto ldx; case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_PROBE_MEM | BPF_DW: - /* Emit 'mov rax, qword ptr [rax+0x14]' */ - EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); -ldx: /* - * If insn->off == 0 we can save one extra byte, but - * special case of x86 R13 which always needs an offset - * is not worth the hassle - */ - if (is_imm8(insn->off)) - EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off); - else - EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), - insn->off); + emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { struct exception_table_entry *ex; u8 *_insn = image + proglen; @@ -899,13 +1008,8 @@ xadd: if (is_imm8(insn->off)) /* call */ case BPF_JMP | BPF_CALL: func = (u8 *) __bpf_call_base + imm32; - jmp_offset = func - (image + addrs[i]); - if (!imm32 || !is_simm32(jmp_offset)) { - pr_err("unsupported BPF func %d addr %p image %p\n", - imm32, func, image); + if (!imm32 || emit_call(&prog, func, image + addrs[i - 1])) return -EINVAL; - } - EMIT1_off32(0xE8, jmp_offset); break; case BPF_JMP | BPF_TAIL_CALL: @@ -1138,6 +1242,210 @@ emit_jmp: return proglen; } +static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args, + int stack_size) +{ + int i; + /* Store function arguments to stack. + * For a function that accepts two pointers the sequence will be: + * mov QWORD PTR [rbp-0x10],rdi + * mov QWORD PTR [rbp-0x8],rsi + */ + for (i = 0; i < min(nr_args, 6); i++) + emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]), + BPF_REG_FP, + i == 5 ? X86_REG_R9 : BPF_REG_1 + i, + -(stack_size - i * 8)); +} + +static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args, + int stack_size) +{ + int i; + + /* Restore function arguments from stack. + * For a function that accepts two pointers the sequence will be: + * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10] + * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8] + */ + for (i = 0; i < min(nr_args, 6); i++) + emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]), + i == 5 ? X86_REG_R9 : BPF_REG_1 + i, + BPF_REG_FP, + -(stack_size - i * 8)); +} + +static int invoke_bpf(struct btf_func_model *m, u8 **pprog, + struct bpf_prog **progs, int prog_cnt, int stack_size) +{ + u8 *prog = *pprog; + int cnt = 0, i; + + for (i = 0; i < prog_cnt; i++) { + if (emit_call(&prog, __bpf_prog_enter, prog)) + return -EINVAL; + /* remember prog start time returned by __bpf_prog_enter */ + emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); + + /* arg1: lea rdi, [rbp - stack_size] */ + EMIT4(0x48, 0x8D, 0x7D, -stack_size); + /* arg2: progs[i]->insnsi for interpreter */ + if (!progs[i]->jited) + emit_mov_imm64(&prog, BPF_REG_2, + (long) progs[i]->insnsi >> 32, + (u32) (long) progs[i]->insnsi); + /* call JITed bpf program or interpreter */ + if (emit_call(&prog, progs[i]->bpf_func, prog)) + return -EINVAL; + + /* arg1: mov rdi, progs[i] */ + emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32, + (u32) (long) progs[i]); + /* arg2: mov rsi, rbx <- start time in nsec */ + emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); + if (emit_call(&prog, __bpf_prog_exit, prog)) + return -EINVAL; + } + *pprog = prog; + return 0; +} + +/* Example: + * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); + * its 'struct btf_func_model' will be nr_args=2 + * The assembly code when eth_type_trans is executing after trampoline: + * + * push rbp + * mov rbp, rsp + * sub rsp, 16 // space for skb and dev + * push rbx // temp regs to pass start time + * mov qword ptr [rbp - 16], rdi // save skb pointer to stack + * mov qword ptr [rbp - 8], rsi // save dev pointer to stack + * call __bpf_prog_enter // rcu_read_lock and preempt_disable + * mov rbx, rax // remember start time in bpf stats are enabled + * lea rdi, [rbp - 16] // R1==ctx of bpf prog + * call addr_of_jited_FENTRY_prog + * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off + * mov rsi, rbx // prog start time + * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math + * mov rdi, qword ptr [rbp - 16] // restore skb pointer from stack + * mov rsi, qword ptr [rbp - 8] // restore dev pointer from stack + * pop rbx + * leave + * ret + * + * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be + * replaced with 'call generated_bpf_trampoline'. When it returns + * eth_type_trans will continue executing with original skb and dev pointers. + * + * The assembly code when eth_type_trans is called from trampoline: + * + * push rbp + * mov rbp, rsp + * sub rsp, 24 // space for skb, dev, return value + * push rbx // temp regs to pass start time + * mov qword ptr [rbp - 24], rdi // save skb pointer to stack + * mov qword ptr [rbp - 16], rsi // save dev pointer to stack + * call __bpf_prog_enter // rcu_read_lock and preempt_disable + * mov rbx, rax // remember start time if bpf stats are enabled + * lea rdi, [rbp - 24] // R1==ctx of bpf prog + * call addr_of_jited_FENTRY_prog // bpf prog can access skb and dev + * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off + * mov rsi, rbx // prog start time + * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math + * mov rdi, qword ptr [rbp - 24] // restore skb pointer from stack + * mov rsi, qword ptr [rbp - 16] // restore dev pointer from stack + * call eth_type_trans+5 // execute body of eth_type_trans + * mov qword ptr [rbp - 8], rax // save return value + * call __bpf_prog_enter // rcu_read_lock and preempt_disable + * mov rbx, rax // remember start time in bpf stats are enabled + * lea rdi, [rbp - 24] // R1==ctx of bpf prog + * call addr_of_jited_FEXIT_prog // bpf prog can access skb, dev, return value + * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off + * mov rsi, rbx // prog start time + * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math + * mov rax, qword ptr [rbp - 8] // restore eth_type_trans's return value + * pop rbx + * leave + * add rsp, 8 // skip eth_type_trans's frame + * ret // return to its caller + */ +int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, + struct bpf_prog **fentry_progs, int fentry_cnt, + struct bpf_prog **fexit_progs, int fexit_cnt, + void *orig_call) +{ + int cnt = 0, nr_args = m->nr_args; + int stack_size = nr_args * 8; + u8 *prog; + + /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ + if (nr_args > 6) + return -ENOTSUPP; + + if ((flags & BPF_TRAMP_F_RESTORE_REGS) && + (flags & BPF_TRAMP_F_SKIP_FRAME)) + return -EINVAL; + + if (flags & BPF_TRAMP_F_CALL_ORIG) + stack_size += 8; /* room for return value of orig_call */ + + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* skip patched call instruction and point orig_call to actual + * body of the kernel function. + */ + orig_call += X86_CALL_SIZE; + + prog = image; + + EMIT1(0x55); /* push rbp */ + EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ + EMIT1(0x53); /* push rbx */ + + save_regs(m, &prog, nr_args, stack_size); + + if (fentry_cnt) + if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size)) + return -EINVAL; + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + if (fentry_cnt) + restore_regs(m, &prog, nr_args, stack_size); + + /* call original function */ + if (emit_call(&prog, orig_call, prog)) + return -EINVAL; + /* remember return value in a stack for bpf prog to access */ + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + } + + if (fexit_cnt) + if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size)) + return -EINVAL; + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + restore_regs(m, &prog, nr_args, stack_size); + + if (flags & BPF_TRAMP_F_CALL_ORIG) + /* restore original return value back into RAX */ + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); + + EMIT1(0x5B); /* pop rbx */ + EMIT1(0xC9); /* leave */ + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* skip our return address and return to parent */ + EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ + EMIT1(0xC3); /* ret */ + /* One half of the page has active running trampoline. + * Another half is an area for next trampoline. + * Make sure the trampoline generation logic doesn't overflow. + */ + if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY)) + return -EFAULT; + return 0; +} + struct x64_jit_data { struct bpf_binary_header *header; int *addrs; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7c7f518811a6..5b81cde47314 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -14,6 +14,8 @@ #include <linux/numa.h> #include <linux/wait.h> #include <linux/u64_stats_sync.h> +#include <linux/refcount.h> +#include <linux/mutex.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -246,7 +248,7 @@ struct bpf_func_proto { }; enum bpf_arg_type arg_type[5]; }; - u32 *btf_id; /* BTF ids of arguments */ + int *btf_id; /* BTF ids of arguments */ }; /* bpf_context is intentionally undefined structure. Pointer to bpf_context is @@ -384,6 +386,104 @@ struct bpf_prog_stats { struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); +struct btf_func_model { + u8 ret_size; + u8 nr_args; + u8 arg_size[MAX_BPF_FUNC_ARGS]; +}; + +/* Restore arguments before returning from trampoline to let original function + * continue executing. This flag is used for fentry progs when there are no + * fexit progs. + */ +#define BPF_TRAMP_F_RESTORE_REGS BIT(0) +/* Call original function after fentry progs, but before fexit progs. + * Makes sense for fentry/fexit, normal calls and indirect calls. + */ +#define BPF_TRAMP_F_CALL_ORIG BIT(1) +/* Skip current frame and return to parent. Makes sense for fentry/fexit + * programs only. Should not be used with normal calls and indirect calls. + */ +#define BPF_TRAMP_F_SKIP_FRAME BIT(2) + +/* Different use cases for BPF trampoline: + * 1. replace nop at the function entry (kprobe equivalent) + * flags = BPF_TRAMP_F_RESTORE_REGS + * fentry = a set of programs to run before returning from trampoline + * + * 2. replace nop at the function entry (kprobe + kretprobe equivalent) + * flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME + * orig_call = fentry_ip + MCOUNT_INSN_SIZE + * fentry = a set of program to run before calling original function + * fexit = a set of program to run after original function + * + * 3. replace direct call instruction anywhere in the function body + * or assign a function pointer for indirect call (like tcp_congestion_ops->cong_avoid) + * With flags = 0 + * fentry = a set of programs to run before returning from trampoline + * With flags = BPF_TRAMP_F_CALL_ORIG + * orig_call = original callback addr or direct function addr + * fentry = a set of program to run before calling original function + * fexit = a set of program to run after original function + */ +int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, + struct bpf_prog **fentry_progs, int fentry_cnt, + struct bpf_prog **fexit_progs, int fexit_cnt, + void *orig_call); +/* these two functions are called from generated trampoline */ +u64 notrace __bpf_prog_enter(void); +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); + +enum bpf_tramp_prog_type { + BPF_TRAMP_FENTRY, + BPF_TRAMP_FEXIT, + BPF_TRAMP_MAX +}; + +struct bpf_trampoline { + /* hlist for trampoline_table */ + struct hlist_node hlist; + /* serializes access to fields of this trampoline */ + struct mutex mutex; + refcount_t refcnt; + u64 key; + struct { + struct btf_func_model model; + void *addr; + } func; + /* list of BPF programs using this trampoline */ + struct hlist_head progs_hlist[BPF_TRAMP_MAX]; + /* Number of attached programs. A counter per kind. */ + int progs_cnt[BPF_TRAMP_MAX]; + /* Executable image of trampoline */ + void *image; + u64 selector; +}; +#ifdef CONFIG_BPF_JIT +struct bpf_trampoline *bpf_trampoline_lookup(u64 key); +int bpf_trampoline_link_prog(struct bpf_prog *prog); +int bpf_trampoline_unlink_prog(struct bpf_prog *prog); +void bpf_trampoline_put(struct bpf_trampoline *tr); +#else +static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) +{ + return NULL; +} +static inline int bpf_trampoline_link_prog(struct bpf_prog *prog) +{ + return -ENOTSUPP; +} +static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) +{ + return -ENOTSUPP; +} +static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} +#endif + +struct bpf_func_info_aux { + bool unreliable; +}; + struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; @@ -395,9 +495,14 @@ struct bpf_prog_aux { u32 func_cnt; /* used by non-func prog as the number of func progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + struct bpf_prog *linked_prog; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ + bool func_proto_unreliable; + enum bpf_tramp_prog_type trampoline_prog_type; + struct bpf_trampoline *trampoline; + struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -419,6 +524,7 @@ struct bpf_prog_aux { struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; + struct bpf_func_info_aux *func_info_aux; /* bpf_line_info loaded from userspace. linfo->insn_off * has the xlated insn offset. * Both the main and sub prog share the same linfo. @@ -648,7 +754,7 @@ DECLARE_PER_CPU(int, bpf_prog_active); extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; -#define BPF_PROG_TYPE(_id, _name) \ +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ extern const struct bpf_prog_ops _name ## _prog_ops; \ extern const struct bpf_verifier_ops _name ## _verifier_ops; #define BPF_MAP_TYPE(_id, _ops) \ @@ -782,7 +888,16 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); -u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *, int); +int btf_resolve_helper_id(struct bpf_verifier_log *log, + const struct bpf_func_proto *fn, int); + +int btf_distill_func_proto(struct bpf_verifier_log *log, + struct btf *btf, + const struct btf_type *func_proto, + const char *func_name, + struct btf_func_model *m); + +int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) @@ -1107,6 +1222,15 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, #endif #ifdef CONFIG_INET +struct sk_reuseport_kern { + struct sk_buff *skb; + struct sock *sk; + struct sock *selected_sk; + void *data_end; + u32 hash; + u32 reuseport_id; + bool bind_inany; +}; bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info); @@ -1157,4 +1281,12 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, } #endif /* CONFIG_INET */ +enum bpf_text_poke_type { + BPF_MOD_NOP_TO_CALL, + BPF_MOD_CALL_TO_CALL, + BPF_MOD_CALL_TO_NOP, +}; +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, + void *addr1, void *addr2); + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index de14872b01ba..93740b3614d7 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -2,42 +2,68 @@ /* internal file - do not include directly */ #ifdef CONFIG_NET -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) -BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp, + struct xdp_md, struct xdp_buff) #ifdef CONFIG_CGROUP_BPF -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock, + struct bpf_sock, struct sock) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr, + struct bpf_sock_addr, struct bpf_sock_addr_kern) #endif -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local) -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) -BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops, + struct bpf_sock_ops, struct bpf_sock_ops_kern) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg, + struct sk_msg_md, struct sk_msg) +BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector, + struct __sk_buff, struct bpf_flow_dissector) #endif #ifdef CONFIG_BPF_EVENTS -BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) -BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) -BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) -BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) -BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) -BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing) +BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe, + bpf_user_pt_regs_t, struct pt_regs) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint, + __u64, u64) +BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event, + struct bpf_perf_event_data, struct bpf_perf_event_data_kern) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint, + struct bpf_raw_tracepoint_args, u64) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable, + struct bpf_raw_tracepoint_args, u64) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing, + void *, void *) #endif #ifdef CONFIG_CGROUP_BPF -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev, + struct bpf_cgroup_dev_ctx, struct bpf_cgroup_dev_ctx) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl, + struct bpf_sysctl, struct bpf_sysctl_kern) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt, + struct bpf_sockopt, struct bpf_sockopt_kern) #endif #ifdef CONFIG_BPF_LIRC_MODE2 -BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) +BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2, + __u32, u32) #endif #ifdef CONFIG_INET -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport, + struct sk_reuseport_md, struct sk_reuseport_kern) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6e7284ea1468..cdd08bf0ec06 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -343,6 +343,7 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) #define BPF_MAX_SUBPROGS 256 struct bpf_subprog_info { + /* 'start' has to be the first field otherwise find_subprog() won't work */ u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 9dee00859c5f..79d4abc2556a 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -88,6 +88,7 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t) const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); +struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index df6809a76404..4842a134b202 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -201,6 +201,8 @@ enum bpf_attach_type { BPF_CGROUP_GETSOCKOPT, BPF_CGROUP_SETSOCKOPT, BPF_TRACE_RAW_TP, + BPF_TRACE_FENTRY, + BPF_TRACE_FEXIT, __MAX_BPF_ATTACH_TYPE }; @@ -423,6 +425,7 @@ union bpf_attr { __aligned_u64 line_info; /* line info */ __u32 line_info_cnt; /* number of bpf_line_info records */ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + __u32 attach_prog_fd; /* 0 to attach to vmlinux */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index e1d9adb212f9..3f671bf617e8 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o +obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_SYSCALL) += btf.o ifeq ($(CONFIG_NET),y) obj-$(CONFIG_BPF_SYSCALL) += devmap.o diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 4639c4ba9a9b..40efde5eedcb 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -2,6 +2,8 @@ /* Copyright (c) 2018 Facebook */ #include <uapi/linux/btf.h> +#include <uapi/linux/bpf.h> +#include <uapi/linux/bpf_perf_event.h> #include <uapi/linux/types.h> #include <linux/seq_file.h> #include <linux/compiler.h> @@ -16,6 +18,9 @@ #include <linux/sort.h> #include <linux/bpf_verifier.h> #include <linux/btf.h> +#include <linux/skmsg.h> +#include <linux/perf_event.h> +#include <net/sock.h> /* BTF (BPF Type Format) is the meta data format which describes * the data types of BPF program/map. Hence, it basically focus @@ -3439,13 +3444,112 @@ errout: extern char __weak _binary__btf_vmlinux_bin_start[]; extern char __weak _binary__btf_vmlinux_bin_end[]; +extern struct btf *btf_vmlinux; + +#define BPF_MAP_TYPE(_id, _ops) +static union { + struct bpf_ctx_convert { +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ + prog_ctx_type _id##_prog; \ + kern_ctx_type _id##_kern; +#include <linux/bpf_types.h> +#undef BPF_PROG_TYPE + } *__t; + /* 't' is written once under lock. Read many times. */ + const struct btf_type *t; +} bpf_ctx_convert; +enum { +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ + __ctx_convert##_id, +#include <linux/bpf_types.h> +#undef BPF_PROG_TYPE +}; +static u8 bpf_ctx_convert_map[] = { +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ + [_id] = __ctx_convert##_id, +#include <linux/bpf_types.h> +#undef BPF_PROG_TYPE +}; +#undef BPF_MAP_TYPE + +static const struct btf_member * +btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type) +{ + const struct btf_type *conv_struct; + const struct btf_type *ctx_struct; + const struct btf_member *ctx_type; + const char *tname, *ctx_tname; + + conv_struct = bpf_ctx_convert.t; + if (!conv_struct) { + bpf_log(log, "btf_vmlinux is malformed\n"); + return NULL; + } + t = btf_type_by_id(btf, t->type); + while (btf_type_is_modifier(t)) + t = btf_type_by_id(btf, t->type); + if (!btf_type_is_struct(t)) { + /* Only pointer to struct is supported for now. + * That means that BPF_PROG_TYPE_TRACEPOINT with BTF + * is not supported yet. + * BPF_PROG_TYPE_RAW_TRACEPOINT is fine. + */ + bpf_log(log, "BPF program ctx type is not a struct\n"); + return NULL; + } + tname = btf_name_by_offset(btf, t->name_off); + if (!tname) { + bpf_log(log, "BPF program ctx struct doesn't have a name\n"); + return NULL; + } + /* prog_type is valid bpf program type. No need for bounds check. */ + ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2; + /* ctx_struct is a pointer to prog_ctx_type in vmlinux. + * Like 'struct __sk_buff' + */ + ctx_struct = btf_type_by_id(btf_vmlinux, ctx_type->type); + if (!ctx_struct) + /* should not happen */ + return NULL; + ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off); + if (!ctx_tname) { + /* should not happen */ + bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n"); + return NULL; + } + /* only compare that prog's ctx type name is the same as + * kernel expects. No need to compare field by field. + * It's ok for bpf prog to do: + * struct __sk_buff {}; + * int socket_filter_bpf_prog(struct __sk_buff *skb) + * { // no fields of skb are ever used } + */ + if (strcmp(ctx_tname, tname)) + return NULL; + return ctx_type; +} + +static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, + struct btf *btf, + const struct btf_type *t, + enum bpf_prog_type prog_type) +{ + const struct btf_member *prog_ctx_type, *kern_ctx_type; + + prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type); + if (!prog_ctx_type) + return -ENOENT; + kern_ctx_type = prog_ctx_type + 1; + return kern_ctx_type->type; +} struct btf *btf_parse_vmlinux(void) { struct btf_verifier_env *env = NULL; struct bpf_verifier_log *log; struct btf *btf = NULL; - int err; + int err, i; env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); if (!env) @@ -3479,6 +3583,26 @@ struct btf *btf_parse_vmlinux(void) if (err) goto errout; + /* find struct bpf_ctx_convert for type checking later */ + for (i = 1; i <= btf->nr_types; i++) { + const struct btf_type *t; + const char *tname; + + t = btf_type_by_id(btf, i); + if (!__btf_type_is_struct(t)) + continue; + tname = __btf_name_by_offset(btf, t->name_off); + if (!strcmp(tname, "bpf_ctx_convert")) { + /* btf_parse_vmlinux() runs under bpf_verifier_lock */ + bpf_ctx_convert.t = t; + break; + } + } + if (i > btf->nr_types) { + err = -ENOENT; + goto errout; + } + btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf; @@ -3492,17 +3616,29 @@ errout: return ERR_PTR(err); } -extern struct btf *btf_vmlinux; +struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) +{ + struct bpf_prog *tgt_prog = prog->aux->linked_prog; + + if (tgt_prog) { + return tgt_prog->aux->btf; + } else { + return btf_vmlinux; + } +} bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { const struct btf_type *t = prog->aux->attach_func_proto; + struct bpf_prog *tgt_prog = prog->aux->linked_prog; + struct btf *btf = bpf_prog_get_target_btf(prog); const char *tname = prog->aux->attach_func_name; struct bpf_verifier_log *log = info->log; const struct btf_param *args; u32 nr_args, arg; + int ret; if (off % 8) { bpf_log(log, "func '%s' offset %d is not multiple of 8\n", @@ -3511,22 +3647,34 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, } arg = off / 8; args = (const struct btf_param *)(t + 1); - nr_args = btf_type_vlen(t); + /* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */ + nr_args = t ? btf_type_vlen(t) : 5; if (prog->aux->attach_btf_trace) { /* skip first 'void *__data' argument in btf_trace_##name typedef */ args++; nr_args--; } - if (arg >= nr_args) { + + if (prog->expected_attach_type == BPF_TRACE_FEXIT && + arg == nr_args) { + if (!t) + /* Default prog with 5 args. 6th arg is retval. */ + return true; + /* function return type */ + t = btf_type_by_id(btf, t->type); + } else if (arg >= nr_args) { bpf_log(log, "func '%s' doesn't have %d-th argument\n", - tname, arg); + tname, arg + 1); return false; + } else { + if (!t) + /* Default prog with 5 args */ + return true; + t = btf_type_by_id(btf, args[arg].type); } - - t = btf_type_by_id(btf_vmlinux, args[arg].type); /* skip modifiers */ while (btf_type_is_modifier(t)) - t = btf_type_by_id(btf_vmlinux, t->type); + t = btf_type_by_id(btf, t->type); if (btf_type_is_int(t)) /* accessing a scalar */ return true; @@ -3534,7 +3682,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, bpf_log(log, "func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n", tname, arg, - __btf_name_by_offset(btf_vmlinux, t->name_off), + __btf_name_by_offset(btf, t->name_off), btf_kind_str[BTF_INFO_KIND(t->info)]); return false; } @@ -3549,10 +3697,19 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, info->reg_type = PTR_TO_BTF_ID; info->btf_id = t->type; - t = btf_type_by_id(btf_vmlinux, t->type); + if (tgt_prog) { + ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type); + if (ret > 0) { + info->btf_id = ret; + return true; + } else { + return false; + } + } + t = btf_type_by_id(btf, t->type); /* skip modifiers */ while (btf_type_is_modifier(t)) - t = btf_type_by_id(btf_vmlinux, t->type); + t = btf_type_by_id(btf, t->type); if (!btf_type_is_struct(t)) { bpf_log(log, "func '%s' arg%d type %s is not a struct\n", @@ -3561,7 +3718,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, } bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n", tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)], - __btf_name_by_offset(btf_vmlinux, t->name_off)); + __btf_name_by_offset(btf, t->name_off)); return true; } @@ -3716,7 +3873,8 @@ again: return -EINVAL; } -u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg) +static int __btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, + int arg) { char fnname[KSYM_SYMBOL_LEN + 4] = "btf_"; const struct btf_param *args; @@ -3784,6 +3942,185 @@ u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg) return btf_id; } +int btf_resolve_helper_id(struct bpf_verifier_log *log, + const struct bpf_func_proto *fn, int arg) +{ + int *btf_id = &fn->btf_id[arg]; + int ret; + + if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID) + return -EINVAL; + + ret = READ_ONCE(*btf_id); + if (ret) + return ret; + /* ok to race the search. The result is the same */ + ret = __btf_resolve_helper_id(log, fn->func, arg); + if (!ret) { + /* Function argument cannot be type 'void' */ + bpf_log(log, "BTF resolution bug\n"); + return -EFAULT; + } + WRITE_ONCE(*btf_id, ret); + return ret; +} + +static int __get_type_size(struct btf *btf, u32 btf_id, + const struct btf_type **bad_type) +{ + const struct btf_type *t; + + if (!btf_id) + /* void */ + return 0; + t = btf_type_by_id(btf, btf_id); + while (t && btf_type_is_modifier(t)) + t = btf_type_by_id(btf, t->type); + if (!t) + return -EINVAL; + if (btf_type_is_ptr(t)) + /* kernel size of pointer. Not BPF's size of pointer*/ + return sizeof(void *); + if (btf_type_is_int(t) || btf_type_is_enum(t)) + return t->size; + *bad_type = t; + return -EINVAL; +} + +int btf_distill_func_proto(struct bpf_verifier_log *log, + struct btf *btf, + const struct btf_type *func, + const char *tname, + struct btf_func_model *m) +{ + const struct btf_param *args; + const struct btf_type *t; + u32 i, nargs; + int ret; + + if (!func) { + /* BTF function prototype doesn't match the verifier types. + * Fall back to 5 u64 args. + */ + for (i = 0; i < 5; i++) + m->arg_size[i] = 8; + m->ret_size = 8; + m->nr_args = 5; + return 0; + } + args = (const struct btf_param *)(func + 1); + nargs = btf_type_vlen(func); + if (nargs >= MAX_BPF_FUNC_ARGS) { + bpf_log(log, + "The function %s has %d arguments. Too many.\n", + tname, nargs); + return -EINVAL; + } + ret = __get_type_size(btf, func->type, &t); + if (ret < 0) { + bpf_log(log, + "The function %s return type %s is unsupported.\n", + tname, btf_kind_str[BTF_INFO_KIND(t->info)]); + return -EINVAL; + } + m->ret_size = ret; + + for (i = 0; i < nargs; i++) { + ret = __get_type_size(btf, args[i].type, &t); + if (ret < 0) { + bpf_log(log, + "The function %s arg%d type %s is unsupported.\n", + tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]); + return -EINVAL; + } + m->arg_size[i] = ret; + } + m->nr_args = nargs; + return 0; +} + +int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog) +{ + struct bpf_verifier_state *st = env->cur_state; + struct bpf_func_state *func = st->frame[st->curframe]; + struct bpf_reg_state *reg = func->regs; + struct bpf_verifier_log *log = &env->log; + struct bpf_prog *prog = env->prog; + struct btf *btf = prog->aux->btf; + const struct btf_param *args; + const struct btf_type *t; + u32 i, nargs, btf_id; + const char *tname; + + if (!prog->aux->func_info) + return 0; + + btf_id = prog->aux->func_info[subprog].type_id; + if (!btf_id) + return 0; + + if (prog->aux->func_info_aux[subprog].unreliable) + return 0; + + t = btf_type_by_id(btf, btf_id); + if (!t || !btf_type_is_func(t)) { + bpf_log(log, "BTF of subprog %d doesn't point to KIND_FUNC\n", + subprog); + return -EINVAL; + } + tname = btf_name_by_offset(btf, t->name_off); + + t = btf_type_by_id(btf, t->type); + if (!t || !btf_type_is_func_proto(t)) { + bpf_log(log, "Invalid type of func %s\n", tname); + return -EINVAL; + } + args = (const struct btf_param *)(t + 1); + nargs = btf_type_vlen(t); + if (nargs > 5) { + bpf_log(log, "Function %s has %d > 5 args\n", tname, nargs); + goto out; + } + /* check that BTF function arguments match actual types that the + * verifier sees. + */ + for (i = 0; i < nargs; i++) { + t = btf_type_by_id(btf, args[i].type); + while (btf_type_is_modifier(t)) + t = btf_type_by_id(btf, t->type); + if (btf_type_is_int(t) || btf_type_is_enum(t)) { + if (reg[i + 1].type == SCALAR_VALUE) + continue; + bpf_log(log, "R%d is not a scalar\n", i + 1); + goto out; + } + if (btf_type_is_ptr(t)) { + if (reg[i + 1].type == SCALAR_VALUE) { + bpf_log(log, "R%d is not a pointer\n", i + 1); + goto out; + } + /* If program is passing PTR_TO_CTX into subprogram + * check that BTF type matches. + */ + if (reg[i + 1].type == PTR_TO_CTX && + !btf_get_prog_ctx_type(log, btf, t, prog->type)) + goto out; + /* All other pointers are ok */ + continue; + } + bpf_log(log, "Unrecognized argument type %s\n", + btf_kind_str[BTF_INFO_KIND(t->info)]); + goto out; + } + return 0; +out: + /* LLVM optimizations can remove arguments from static functions. */ + bpf_log(log, + "Type info disagrees with actual arguments due to compiler optimizations\n"); + prog->aux->func_info_aux[subprog].unreliable = true; + return 0; +} + void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m) { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 99693f3c4e99..b5945c3aaa8e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2015,6 +2015,7 @@ static void bpf_prog_free_deferred(struct work_struct *work) if (aux->prog->has_callchain_buf) put_callchain_buffers(); #endif + bpf_trampoline_put(aux->trampoline); for (i = 0; i < aux->func_cnt; i++) bpf_jit_free(aux->func[i]); if (aux->func_cnt) { @@ -2030,6 +2031,8 @@ void bpf_prog_free(struct bpf_prog *fp) { struct bpf_prog_aux *aux = fp->aux; + if (aux->linked_prog) + bpf_prog_put(aux->linked_prog); INIT_WORK(&aux->work, bpf_prog_free_deferred); schedule_work(&aux->work); } @@ -2144,6 +2147,12 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, return -EFAULT; } +int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, + void *addr1, void *addr2) +{ + return -ENOTSUPP; +} + DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); EXPORT_SYMBOL(bpf_stats_enabled_key); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6d9ce95e5a8d..c88c815c2154 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(map_idr_lock); int sysctl_unprivileged_bpf_disabled __read_mostly; static const struct bpf_map_ops * const bpf_map_types[] = { -#define BPF_PROG_TYPE(_id, _ops) +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) #define BPF_MAP_TYPE(_id, _ops) \ [_id] = &_ops, #include <linux/bpf_types.h> @@ -1189,7 +1189,7 @@ err_put: } static const struct bpf_prog_ops * const bpf_prog_types[] = { -#define BPF_PROG_TYPE(_id, _name) \ +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ [_id] = & _name ## _prog_ops, #define BPF_MAP_TYPE(_id, _ops) #include <linux/bpf_types.h> @@ -1328,6 +1328,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); kvfree(aux->func_info); + kfree(aux->func_info_aux); free_used_maps(aux); bpf_prog_uncharge_memlock(aux->prog); security_bpf_prog_free(aux); @@ -1576,7 +1577,7 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) static int bpf_prog_load_check_attach(enum bpf_prog_type prog_type, enum bpf_attach_type expected_attach_type, - u32 btf_id) + u32 btf_id, u32 prog_fd) { switch (prog_type) { case BPF_PROG_TYPE_TRACING: @@ -1584,7 +1585,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, return -EINVAL; break; default: - if (btf_id) + if (btf_id || prog_fd) return -EINVAL; break; } @@ -1635,7 +1636,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD attach_btf_id +#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) { @@ -1678,7 +1679,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) bpf_prog_load_fixup_attach_type(attr); if (bpf_prog_load_check_attach(type, attr->expected_attach_type, - attr->attach_btf_id)) + attr->attach_btf_id, + attr->attach_prog_fd)) return -EINVAL; /* plain bpf_prog allocation */ @@ -1688,6 +1690,16 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) prog->expected_attach_type = attr->expected_attach_type; prog->aux->attach_btf_id = attr->attach_btf_id; + if (attr->attach_prog_fd) { + struct bpf_prog *tgt_prog; + + tgt_prog = bpf_prog_get(attr->attach_prog_fd); + if (IS_ERR(tgt_prog)) { + err = PTR_ERR(tgt_prog); + goto free_prog_nouncharge; + } + prog->aux->linked_prog = tgt_prog; + } prog->aux->offload_requested = !!attr->prog_ifindex; @@ -1799,6 +1811,49 @@ static int bpf_obj_get(const union bpf_attr *attr) attr->file_flags); } +static int bpf_tracing_prog_release(struct inode *inode, struct file *filp) +{ + struct bpf_prog *prog = filp->private_data; + + WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog)); + bpf_prog_put(prog); + return 0; +} + +static const struct file_operations bpf_tracing_prog_fops = { + .release = bpf_tracing_prog_release, + .read = bpf_dummy_read, + .write = bpf_dummy_write, +}; + +static int bpf_tracing_prog_attach(struct bpf_prog *prog) +{ + int tr_fd, err; + + if (prog->expected_attach_type != BPF_TRACE_FENTRY && + prog->expected_attach_type != BPF_TRACE_FEXIT) { + err = -EINVAL; + goto out_put_prog; + } + + err = bpf_trampoline_link_prog(prog); + if (err) + goto out_put_prog; + + tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops, + prog, O_CLOEXEC); + if (tr_fd < 0) { + WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog)); + err = tr_fd; + goto out_put_prog; + } + return tr_fd; + +out_put_prog: + bpf_prog_put(prog); + return err; +} + struct bpf_raw_tracepoint { struct bpf_raw_event_map *btp; struct bpf_prog *prog; @@ -1850,14 +1905,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) if (prog->type == BPF_PROG_TYPE_TRACING) { if (attr->raw_tracepoint.name) { - /* raw_tp name should not be specified in raw_tp - * programs that were verified via in-kernel BTF info + /* The attach point for this category of programs + * should be specified via btf_id during program load. */ err = -EINVAL; goto out_put_prog; } - /* raw_tp name is taken from type name instead */ - tp_name = prog->aux->attach_func_name; + if (prog->expected_attach_type == BPF_TRACE_RAW_TP) + tp_name = prog->aux->attach_func_name; + else + return bpf_tracing_prog_attach(prog); } else { if (strncpy_from_user(buf, u64_to_user_ptr(attr->raw_tracepoint.name), diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c new file mode 100644 index 000000000000..10ae59d65f13 --- /dev/null +++ b/kernel/bpf/trampoline.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2019 Facebook */ +#include <linux/hash.h> +#include <linux/bpf.h> +#include <linux/filter.h> + +/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */ +#define TRAMPOLINE_HASH_BITS 10 +#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) + +static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; + +/* serializes access to trampoline_table */ +static DEFINE_MUTEX(trampoline_mutex); + +struct bpf_trampoline *bpf_trampoline_lookup(u64 key) +{ + struct bpf_trampoline *tr; + struct hlist_head *head; + void *image; + int i; + + mutex_lock(&trampoline_mutex); + head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)]; + hlist_for_each_entry(tr, head, hlist) { + if (tr->key == key) { + refcount_inc(&tr->refcnt); + goto out; + } + } + tr = kzalloc(sizeof(*tr), GFP_KERNEL); + if (!tr) + goto out; + + /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ + image = bpf_jit_alloc_exec(PAGE_SIZE); + if (!image) { + kfree(tr); + tr = NULL; + goto out; + } + + tr->key = key; + INIT_HLIST_NODE(&tr->hlist); + hlist_add_head(&tr->hlist, head); + refcount_set(&tr->refcnt, 1); + mutex_init(&tr->mutex); + for (i = 0; i < BPF_TRAMP_MAX; i++) + INIT_HLIST_HEAD(&tr->progs_hlist[i]); + + set_vm_flush_reset_perms(image); + /* Keep image as writeable. The alternative is to keep flipping ro/rw + * everytime new program is attached or detached. + */ + set_memory_x((long)image, 1); + tr->image = image; +out: + mutex_unlock(&trampoline_mutex); + return tr; +} + +/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 + * bytes on x86. Pick a number to fit into PAGE_SIZE / 2 + */ +#define BPF_MAX_TRAMP_PROGS 40 + +static int bpf_trampoline_update(struct bpf_trampoline *tr) +{ + void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2; + void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2; + struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS]; + int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY]; + int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT]; + struct bpf_prog **progs, **fentry, **fexit; + u32 flags = BPF_TRAMP_F_RESTORE_REGS; + struct bpf_prog_aux *aux; + int err; + + if (fentry_cnt + fexit_cnt == 0) { + err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_NOP, + old_image, NULL); + tr->selector = 0; + goto out; + } + + /* populate fentry progs */ + fentry = progs = progs_to_run; + hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist) + *progs++ = aux->prog; + + /* populate fexit progs */ + fexit = progs; + hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist) + *progs++ = aux->prog; + + if (fexit_cnt) + flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; + + err = arch_prepare_bpf_trampoline(new_image, &tr->func.model, flags, + fentry, fentry_cnt, + fexit, fexit_cnt, + tr->func.addr); + if (err) + goto out; + + if (tr->selector) + /* progs already running at this address */ + err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_CALL, + old_image, new_image); + else + /* first time registering */ + err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP_TO_CALL, + NULL, new_image); + if (err) + goto out; + tr->selector++; +out: + return err; +} + +static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t) +{ + switch (t) { + case BPF_TRACE_FENTRY: + return BPF_TRAMP_FENTRY; + default: + return BPF_TRAMP_FEXIT; + } +} + +int bpf_trampoline_link_prog(struct bpf_prog *prog) +{ + enum bpf_tramp_prog_type kind; + struct bpf_trampoline *tr; + int err = 0; + + tr = prog->aux->trampoline; + kind = bpf_attach_type_to_tramp(prog->expected_attach_type); + mutex_lock(&tr->mutex); + if (tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT] + >= BPF_MAX_TRAMP_PROGS) { + err = -E2BIG; + goto out; + } + if (!hlist_unhashed(&prog->aux->tramp_hlist)) { + /* prog already linked */ + err = -EBUSY; + goto out; + } + hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]); + tr->progs_cnt[kind]++; + err = bpf_trampoline_update(prog->aux->trampoline); + if (err) { + hlist_del(&prog->aux->tramp_hlist); + tr->progs_cnt[kind]--; + } +out: + mutex_unlock(&tr->mutex); + return err; +} + +/* bpf_trampoline_unlink_prog() should never fail. */ +int bpf_trampoline_unlink_prog(struct bpf_prog *prog) +{ + enum bpf_tramp_prog_type kind; + struct bpf_trampoline *tr; + int err; + + tr = prog->aux->trampoline; + kind = bpf_attach_type_to_tramp(prog->expected_attach_type); + mutex_lock(&tr->mutex); + hlist_del(&prog->aux->tramp_hlist); + tr->progs_cnt[kind]--; + err = bpf_trampoline_update(prog->aux->trampoline); + mutex_unlock(&tr->mutex); + return err; +} + +void bpf_trampoline_put(struct bpf_trampoline *tr) +{ + if (!tr) + return; + mutex_lock(&trampoline_mutex); + if (!refcount_dec_and_test(&tr->refcnt)) + goto out; + WARN_ON_ONCE(mutex_is_locked(&tr->mutex)); + if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY]))) + goto out; + if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) + goto out; + bpf_jit_free_exec(tr->image); + hlist_del(&tr->hlist); + kfree(tr); +out: + mutex_unlock(&trampoline_mutex); +} + +/* The logic is similar to BPF_PROG_RUN, but with explicit rcu and preempt that + * are needed for trampoline. The macro is split into + * call _bpf_prog_enter + * call prog->bpf_func + * call __bpf_prog_exit + */ +u64 notrace __bpf_prog_enter(void) +{ + u64 start = 0; + + rcu_read_lock(); + preempt_disable(); + if (static_branch_unlikely(&bpf_stats_enabled_key)) + start = sched_clock(); + return start; +} + +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) +{ + struct bpf_prog_stats *stats; + + if (static_branch_unlikely(&bpf_stats_enabled_key) && + /* static_key could be enabled in __bpf_prog_enter + * and disabled in __bpf_prog_exit. + * And vice versa. + * Hence check that 'start' is not zero. + */ + start) { + stats = this_cpu_ptr(prog->aux->stats); + u64_stats_update_begin(&stats->syncp); + stats->cnt++; + stats->nsecs += sched_clock() - start; + u64_stats_update_end(&stats->syncp); + } + preempt_enable(); + rcu_read_unlock(); +} + +int __weak +arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, + struct bpf_prog **fentry_progs, int fentry_cnt, + struct bpf_prog **fexit_progs, int fexit_cnt, + void *orig_call) +{ + return -ENOTSUPP; +} + +static int __init init_trampolines(void) +{ + int i; + + for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++) + INIT_HLIST_HEAD(&trampoline_table[i]); + return 0; +} +late_initcall(init_trampolines); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2f2374967b36..e9dc95a18d44 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -23,7 +23,7 @@ #include "disasm.h" static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { -#define BPF_PROG_TYPE(_id, _name) \ +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ [_id] = & _name ## _verifier_ops, #define BPF_MAP_TYPE(_id, _ops) #include <linux/bpf_types.h> @@ -3970,6 +3970,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, /* only increment it after check_reg_arg() finished */ state->curframe++; + if (btf_check_func_arg_match(env, subprog)) + return -EINVAL; + /* and go analyze first insn of the callee */ *insn_idx = target_insn; @@ -4147,11 +4150,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn meta.func_id = func_id; /* check args */ for (i = 0; i < 5; i++) { - if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID) { - if (!fn->btf_id[i]) - fn->btf_id[i] = btf_resolve_helper_id(&env->log, fn->func, i); - meta.btf_id = fn->btf_id[i]; - } + err = btf_resolve_helper_id(&env->log, fn, i); + if (err > 0) + meta.btf_id = err; err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta); if (err) return err; @@ -6566,6 +6567,7 @@ static int check_btf_func(struct bpf_verifier_env *env, u32 i, nfuncs, urec_size, min_size; u32 krec_size = sizeof(struct bpf_func_info); struct bpf_func_info *krecord; + struct bpf_func_info_aux *info_aux = NULL; const struct btf_type *type; struct bpf_prog *prog; const struct btf *btf; @@ -6599,6 +6601,9 @@ static int check_btf_func(struct bpf_verifier_env *env, krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); if (!krecord) return -ENOMEM; + info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN); + if (!info_aux) + goto err_free; for (i = 0; i < nfuncs; i++) { ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); @@ -6650,29 +6655,31 @@ static int check_btf_func(struct bpf_verifier_env *env, ret = -EINVAL; goto err_free; } - prev_offset = krecord[i].insn_off; urecord += urec_size; } prog->aux->func_info = krecord; prog->aux->func_info_cnt = nfuncs; + prog->aux->func_info_aux = info_aux; return 0; err_free: kvfree(krecord); + kfree(info_aux); return ret; } static void adjust_btf_func(struct bpf_verifier_env *env) { + struct bpf_prog_aux *aux = env->prog->aux; int i; - if (!env->prog->aux->func_info) + if (!aux->func_info) return; for (i = 0; i < env->subprog_cnt; i++) - env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start; + aux->func_info[i].insn_off = env->subprog_info[i].start; } #define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \ @@ -7653,6 +7660,9 @@ static int do_check(struct bpf_verifier_env *env) 0 /* frameno */, 0 /* subprogno, zero == main subprog */); + if (btf_check_func_arg_match(env, 0)) + return -EINVAL; + for (;;) { struct bpf_insn *insn; u8 class; @@ -9380,10 +9390,17 @@ static void print_verification_stats(struct bpf_verifier_env *env) static int check_attach_btf_id(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; + struct bpf_prog *tgt_prog = prog->aux->linked_prog; u32 btf_id = prog->aux->attach_btf_id; const char prefix[] = "btf_trace_"; + int ret = 0, subprog = -1, i; + struct bpf_trampoline *tr; const struct btf_type *t; + bool conservative = true; const char *tname; + struct btf *btf; + long addr; + u64 key; if (prog->type != BPF_PROG_TYPE_TRACING) return 0; @@ -9392,19 +9409,47 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) verbose(env, "Tracing programs must provide btf_id\n"); return -EINVAL; } - t = btf_type_by_id(btf_vmlinux, btf_id); + btf = bpf_prog_get_target_btf(prog); + if (!btf) { + verbose(env, + "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n"); + return -EINVAL; + } + t = btf_type_by_id(btf, btf_id); if (!t) { verbose(env, "attach_btf_id %u is invalid\n", btf_id); return -EINVAL; } - tname = btf_name_by_offset(btf_vmlinux, t->name_off); + tname = btf_name_by_offset(btf, t->name_off); if (!tname) { verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id); return -EINVAL; } + if (tgt_prog) { + struct bpf_prog_aux *aux = tgt_prog->aux; + + for (i = 0; i < aux->func_info_cnt; i++) + if (aux->func_info[i].type_id == btf_id) { + subprog = i; + break; + } + if (subprog == -1) { + verbose(env, "Subprog %s doesn't exist\n", tname); + return -EINVAL; + } + conservative = aux->func_info_aux[subprog].unreliable; + key = ((u64)aux->id) << 32 | btf_id; + } else { + key = btf_id; + } switch (prog->expected_attach_type) { case BPF_TRACE_RAW_TP: + if (tgt_prog) { + verbose(env, + "Only FENTRY/FEXIT progs are attachable to another BPF prog\n"); + return -EINVAL; + } if (!btf_type_is_typedef(t)) { verbose(env, "attach_btf_id %u is not a typedef\n", btf_id); @@ -9416,11 +9461,11 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) return -EINVAL; } tname += sizeof(prefix) - 1; - t = btf_type_by_id(btf_vmlinux, t->type); + t = btf_type_by_id(btf, t->type); if (!btf_type_is_ptr(t)) /* should never happen in valid vmlinux build */ return -EINVAL; - t = btf_type_by_id(btf_vmlinux, t->type); + t = btf_type_by_id(btf, t->type); if (!btf_type_is_func_proto(t)) /* should never happen in valid vmlinux build */ return -EINVAL; @@ -9432,6 +9477,66 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) prog->aux->attach_func_proto = t; prog->aux->attach_btf_trace = true; return 0; + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + if (!btf_type_is_func(t)) { + verbose(env, "attach_btf_id %u is not a function\n", + btf_id); + return -EINVAL; + } + t = btf_type_by_id(btf, t->type); + if (!btf_type_is_func_proto(t)) + return -EINVAL; + tr = bpf_trampoline_lookup(key); + if (!tr) + return -ENOMEM; + prog->aux->attach_func_name = tname; + /* t is either vmlinux type or another program's type */ + prog->aux->attach_func_proto = t; + mutex_lock(&tr->mutex); + if (tr->func.addr) { + prog->aux->trampoline = tr; + goto out; + } + if (tgt_prog && conservative) { + prog->aux->attach_func_proto = NULL; + t = NULL; + } + ret = btf_distill_func_proto(&env->log, btf, t, + tname, &tr->func.model); + if (ret < 0) + goto out; + if (tgt_prog) { + if (!tgt_prog->jited) { + /* for now */ + verbose(env, "Can trace only JITed BPF progs\n"); + ret = -EINVAL; + goto out; + } + if (tgt_prog->type == BPF_PROG_TYPE_TRACING) { + /* prevent cycles */ + verbose(env, "Cannot recursively attach\n"); + ret = -EINVAL; + goto out; + } + addr = (long) tgt_prog->aux->func[subprog]->bpf_func; + } else { + addr = kallsyms_lookup_name(tname); + if (!addr) { + verbose(env, + "The address of function %s cannot be found\n", + tname); + ret = -ENOENT; + goto out; + } + } + tr->func.addr = (void *)addr; + prog->aux->trampoline = tr; +out: + mutex_unlock(&tr->mutex); + if (ret) + bpf_trampoline_put(tr); + return ret; default: return -EINVAL; } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 0be4497cb832..62933279fbba 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -105,6 +105,40 @@ out: return err; } +/* Integer types of various sizes and pointer combinations cover variety of + * architecture dependent calling conventions. 7+ can be supported in the + * future. + */ +int noinline bpf_fentry_test1(int a) +{ + return a + 1; +} + +int noinline bpf_fentry_test2(int a, u64 b) +{ + return a + b; +} + +int noinline bpf_fentry_test3(char a, int b, u64 c) +{ + return a + b + c; +} + +int noinline bpf_fentry_test4(void *a, char b, int c, u64 d) +{ + return (long)a + b + c + d; +} + +int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e) +{ + return a + (long)b + c + d + e; +} + +int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) +{ + return a + (long)b + c + d + (long)e + f; +} + static void *bpf_test_init(const union bpf_attr *kattr, u32 size, u32 headroom, u32 tailroom) { @@ -122,6 +156,13 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, kfree(data); return ERR_PTR(-EFAULT); } + if (bpf_fentry_test1(1) != 2 || + bpf_fentry_test2(2, 3) != 5 || + bpf_fentry_test3(4, 5, 6) != 15 || + bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || + bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) + return ERR_PTR(-EFAULT); return data; } diff --git a/net/core/filter.c b/net/core/filter.c index fc303abec8fa..49ded4a7588a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3816,7 +3816,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -static u32 bpf_skb_output_btf_ids[5]; +static int bpf_skb_output_btf_ids[5]; const struct bpf_func_proto bpf_skb_output_proto = { .func = bpf_skb_event_output, .gpl_only = true, @@ -8684,16 +8684,6 @@ out: } #ifdef CONFIG_INET -struct sk_reuseport_kern { - struct sk_buff *skb; - struct sock *sk; - struct sock *selected_sk; - void *data_end; - u32 hash; - u32 reuseport_id; - bool bind_inany; -}; - static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern, struct sock_reuseport *reuse, struct sock *sk, struct sk_buff *skb, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index df6809a76404..4842a134b202 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -201,6 +201,8 @@ enum bpf_attach_type { BPF_CGROUP_GETSOCKOPT, BPF_CGROUP_SETSOCKOPT, BPF_TRACE_RAW_TP, + BPF_TRACE_FENTRY, + BPF_TRACE_FEXIT, __MAX_BPF_ATTACH_TYPE }; @@ -423,6 +425,7 @@ union bpf_attr { __aligned_u64 line_info; /* line info */ __u32 line_info_cnt; /* number of bpf_line_info records */ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + __u32 attach_prog_fd; /* 0 to attach to vmlinux */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index b3e3e99a0f28..98596e15390f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -228,10 +228,13 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; - if (attr.prog_type == BPF_PROG_TYPE_TRACING) + if (attr.prog_type == BPF_PROG_TYPE_TRACING) { attr.attach_btf_id = load_attr->attach_btf_id; - else + attr.attach_prog_fd = load_attr->attach_prog_fd; + } else { attr.prog_ifindex = load_attr->prog_ifindex; + attr.kern_version = load_attr->kern_version; + } attr.insn_cnt = (__u32)load_attr->insns_cnt; attr.insns = ptr_to_u64(load_attr->insns); attr.license = ptr_to_u64(load_attr->license); @@ -245,7 +248,6 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.log_size = 0; } - attr.kern_version = load_attr->kern_version; attr.prog_btf_fd = load_attr->prog_btf_fd; attr.func_info_rec_size = load_attr->func_info_rec_size; attr.func_info_cnt = load_attr->func_info_cnt; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 1c53bc5b4b3c..3c791fa8e68e 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -77,7 +77,10 @@ struct bpf_load_program_attr { const struct bpf_insn *insns; size_t insns_cnt; const char *license; - __u32 kern_version; + union { + __u32 kern_version; + __u32 attach_prog_fd; + }; union { __u32 prog_ifindex; __u32 attach_btf_id; diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 0c7d28292898..c63ab1add126 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -44,4 +44,17 @@ enum libbpf_pin_type { LIBBPF_PIN_BY_NAME, }; +/* The following types should be used by BPF_PROG_TYPE_TRACING program to + * access kernel function arguments. BPF trampoline and raw tracepoints + * typecast arguments to 'unsigned long long'. + */ +typedef int __attribute__((aligned(8))) ks32; +typedef char __attribute__((aligned(8))) ks8; +typedef short __attribute__((aligned(8))) ks16; +typedef long long __attribute__((aligned(8))) ks64; +typedef unsigned int __attribute__((aligned(8))) ku32; +typedef unsigned char __attribute__((aligned(8))) ku8; +typedef unsigned short __attribute__((aligned(8))) ku16; +typedef unsigned long long __attribute__((aligned(8))) ku64; + #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 86a1847e4a9f..88efa2bb7137 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -316,6 +316,28 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name) return -ENOENT; } +__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, + __u32 kind) +{ + __u32 i; + + if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) + return 0; + + for (i = 1; i <= btf->nr_types; i++) { + const struct btf_type *t = btf->types[i]; + const char *name; + + if (btf_kind(t) != kind) + continue; + name = btf__name_by_offset(btf, t->name_off); + if (name && !strcmp(type_name, name)) + return i; + } + + return -ENOENT; +} + void btf__free(struct btf *btf) { if (!btf) diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b18994116a44..d9ac73a02cde 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -72,6 +72,8 @@ LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); +LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, + const char *type_name, __u32 kind); LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 96ef18cfeffb..7132c6bdec02 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -189,6 +189,7 @@ struct bpf_program { enum bpf_attach_type expected_attach_type; __u32 attach_btf_id; + __u32 attach_prog_fd; void *func_info; __u32 func_info_rec_size; __u32 func_info_cnt; @@ -3683,8 +3684,13 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = license; - load_attr.kern_version = kern_version; - load_attr.prog_ifindex = prog->prog_ifindex; + if (prog->type == BPF_PROG_TYPE_TRACING) { + load_attr.attach_prog_fd = prog->attach_prog_fd; + load_attr.attach_btf_id = prog->attach_btf_id; + } else { + load_attr.kern_version = kern_version; + load_attr.prog_ifindex = prog->prog_ifindex; + } /* if .BTF.ext was loaded, kernel supports associated BTF for prog */ if (prog->obj->btf_ext) btf_fd = bpf_object__btf_fd(prog->obj); @@ -3699,7 +3705,6 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.line_info_cnt = prog->line_info_cnt; load_attr.log_level = prog->log_level; load_attr.prog_flags = prog->prog_flags; - load_attr.attach_btf_id = prog->attach_btf_id; retry_load: log_buf = malloc(log_buf_size); @@ -3856,8 +3861,9 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) return 0; } -static int libbpf_attach_btf_id_by_name(const char *name, __u32 *btf_id); - +static int libbpf_find_attach_btf_id(const char *name, + enum bpf_attach_type attach_type, + __u32 attach_prog_fd); static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, struct bpf_object_open_opts *opts) @@ -3868,6 +3874,7 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, const char *obj_name; char tmp_name[64]; bool relaxed_maps; + __u32 attach_prog_fd; int err; if (elf_version(EV_CURRENT) == EV_NONE) { @@ -3898,6 +3905,7 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); relaxed_maps = OPTS_GET(opts, relaxed_maps, false); pin_root_path = OPTS_GET(opts, pin_root_path, NULL); + attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); CHECK_ERR(bpf_object__elf_init(obj), err, out); CHECK_ERR(bpf_object__check_endianness(obj), err, out); @@ -3910,7 +3918,6 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, bpf_object__for_each_program(prog, obj) { enum bpf_prog_type prog_type; enum bpf_attach_type attach_type; - __u32 btf_id; err = libbpf_prog_type_by_name(prog->section_name, &prog_type, &attach_type); @@ -3923,10 +3930,13 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, bpf_program__set_type(prog, prog_type); bpf_program__set_expected_attach_type(prog, attach_type); if (prog_type == BPF_PROG_TYPE_TRACING) { - err = libbpf_attach_btf_id_by_name(prog->section_name, &btf_id); - if (err) + err = libbpf_find_attach_btf_id(prog->section_name, + attach_type, + attach_prog_fd); + if (err <= 0) goto out; - prog->attach_btf_id = btf_id; + prog->attach_btf_id = err; + prog->attach_prog_fd = attach_prog_fd; } } @@ -4935,6 +4945,10 @@ static const struct { BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT), BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING, BPF_TRACE_RAW_TP), + BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING, + BPF_TRACE_FENTRY), + BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING, + BPF_TRACE_FEXIT), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), @@ -5052,43 +5066,94 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, } #define BTF_PREFIX "btf_trace_" -static int libbpf_attach_btf_id_by_name(const char *name, __u32 *btf_id) +int libbpf_find_vmlinux_btf_id(const char *name, + enum bpf_attach_type attach_type) { struct btf *btf = bpf_core_find_kernel_btf(); - char raw_tp_btf_name[128] = BTF_PREFIX; - char *dst = raw_tp_btf_name + sizeof(BTF_PREFIX) - 1; - int ret, i, err = -EINVAL; + char raw_tp_btf[128] = BTF_PREFIX; + char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1; + const char *btf_name; + int err = -EINVAL; + u32 kind; if (IS_ERR(btf)) { pr_warn("vmlinux BTF is not found\n"); return -EINVAL; } - if (!name) + if (attach_type == BPF_TRACE_RAW_TP) { + /* prepend "btf_trace_" prefix per kernel convention */ + strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX)); + btf_name = raw_tp_btf; + kind = BTF_KIND_TYPEDEF; + } else { + btf_name = name; + kind = BTF_KIND_FUNC; + } + err = btf__find_by_name_kind(btf, btf_name, kind); + btf__free(btf); + return err; +} + +static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info *info; + struct btf *btf = NULL; + int err = -EINVAL; + + info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); + if (IS_ERR_OR_NULL(info_linear)) { + pr_warn("failed get_prog_info_linear for FD %d\n", + attach_prog_fd); + return -EINVAL; + } + info = &info_linear->info; + if (!info->btf_id) { + pr_warn("The target program doesn't have BTF\n"); + goto out; + } + if (btf__get_from_id(info->btf_id, &btf)) { + pr_warn("Failed to get BTF of the program\n"); goto out; + } + err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); + btf__free(btf); + if (err <= 0) { + pr_warn("%s is not found in prog's BTF\n", name); + goto out; + } +out: + free(info_linear); + return err; +} + +static int libbpf_find_attach_btf_id(const char *name, + enum bpf_attach_type attach_type, + __u32 attach_prog_fd) +{ + int i, err; + + if (!name) + return -EINVAL; for (i = 0; i < ARRAY_SIZE(section_names); i++) { if (!section_names[i].is_attach_btf) continue; if (strncmp(name, section_names[i].sec, section_names[i].len)) continue; - /* prepend "btf_trace_" prefix per kernel convention */ - strncat(dst, name + section_names[i].len, - sizeof(raw_tp_btf_name) - sizeof(BTF_PREFIX)); - ret = btf__find_by_name(btf, raw_tp_btf_name); - if (ret <= 0) { - pr_warn("%s is not found in vmlinux BTF\n", dst); - goto out; - } - *btf_id = ret; - err = 0; - goto out; + if (attach_prog_fd) + err = libbpf_find_prog_btf_id(name + section_names[i].len, + attach_prog_fd); + else + err = libbpf_find_vmlinux_btf_id(name + section_names[i].len, + attach_type); + if (err <= 0) + pr_warn("%s is not found in vmlinux BTF\n", name); + return err; } pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name); - err = -ESRCH; -out: - btf__free(btf); - return err; + return -ESRCH; } int libbpf_attach_type_by_name(const char *name, @@ -5716,6 +5781,37 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return (struct bpf_link *)link; } +struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link_fd *link; + int prog_fd, pfd; + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("program '%s': can't attach before loaded\n", + bpf_program__title(prog, false)); + return ERR_PTR(-EINVAL); + } + + link = malloc(sizeof(*link)); + if (!link) + return ERR_PTR(-ENOMEM); + link->link.destroy = &bpf_link__destroy_fd; + + pfd = bpf_raw_tracepoint_open(NULL, prog_fd); + if (pfd < 0) { + pfd = -errno; + free(link); + pr_warn("program '%s': failed to attach to trace: %s\n", + bpf_program__title(prog, false), + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return ERR_PTR(pfd); + } + link->fd = pfd; + return (struct bpf_link *)link; +} + enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 5aa27caad6c2..0dbf4bfba0c4 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -108,8 +108,9 @@ struct bpf_object_open_opts { * auto-pinned to that path on load; defaults to "/sys/fs/bpf". */ const char *pin_root_path; + __u32 attach_prog_fd; }; -#define bpf_object_open_opts__last_field pin_root_path +#define bpf_object_open_opts__last_field attach_prog_fd LIBBPF_API struct bpf_object *bpf_object__open(const char *path); LIBBPF_API struct bpf_object * @@ -188,6 +189,8 @@ libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, enum bpf_attach_type *expected_attach_type); LIBBPF_API int libbpf_attach_type_by_name(const char *name, enum bpf_attach_type *attach_type); +LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, + enum bpf_attach_type attach_type); /* Accessors of bpf_program */ struct bpf_program; @@ -251,6 +254,8 @@ LIBBPF_API struct bpf_link * bpf_program__attach_raw_tracepoint(struct bpf_program *prog, const char *tp_name); +LIBBPF_API struct bpf_link * +bpf_program__attach_trace(struct bpf_program *prog); struct bpf_insn; /* diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 9f39ee06b2d4..8ddc2c40e482 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -199,9 +199,12 @@ LIBBPF_0.0.6 { bpf_map__set_pin_path; bpf_object__open_file; bpf_object__open_mem; + bpf_program__attach_trace; bpf_program__get_expected_attach_type; bpf_program__get_type; bpf_program__is_tracing; bpf_program__set_tracing; bpf_program__size; + btf__find_by_name_kind; + libbpf_find_vmlinux_btf_id; } LIBBPF_0.0.5; diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c new file mode 100644 index 000000000000..40bcff2cc274 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> + +void test_fentry_fexit(void) +{ + struct bpf_prog_load_attr attr_fentry = { + .file = "./fentry_test.o", + }; + struct bpf_prog_load_attr attr_fexit = { + .file = "./fexit_test.o", + }; + + struct bpf_object *obj_fentry = NULL, *obj_fexit = NULL, *pkt_obj; + struct bpf_map *data_map_fentry, *data_map_fexit; + char fentry_name[] = "fentry/bpf_fentry_testX"; + char fexit_name[] = "fexit/bpf_fentry_testX"; + int err, pkt_fd, kfree_skb_fd, i; + struct bpf_link *link[12] = {}; + struct bpf_program *prog[12]; + __u32 duration, retval; + const int zero = 0; + u64 result[12]; + + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, + &pkt_obj, &pkt_fd); + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + return; + err = bpf_prog_load_xattr(&attr_fentry, &obj_fentry, &kfree_skb_fd); + if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) + goto close_prog; + err = bpf_prog_load_xattr(&attr_fexit, &obj_fexit, &kfree_skb_fd); + if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) + goto close_prog; + + for (i = 0; i < 6; i++) { + fentry_name[sizeof(fentry_name) - 2] = '1' + i; + prog[i] = bpf_object__find_program_by_title(obj_fentry, fentry_name); + if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fentry_name)) + goto close_prog; + link[i] = bpf_program__attach_trace(prog[i]); + if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) + goto close_prog; + } + data_map_fentry = bpf_object__find_map_by_name(obj_fentry, "fentry_t.bss"); + if (CHECK(!data_map_fentry, "find_data_map", "data map not found\n")) + goto close_prog; + + for (i = 6; i < 12; i++) { + fexit_name[sizeof(fexit_name) - 2] = '1' + i - 6; + prog[i] = bpf_object__find_program_by_title(obj_fexit, fexit_name); + if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fexit_name)) + goto close_prog; + link[i] = bpf_program__attach_trace(prog[i]); + if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) + goto close_prog; + } + data_map_fexit = bpf_object__find_map_by_name(obj_fexit, "fexit_te.bss"); + if (CHECK(!data_map_fexit, "find_data_map", "data map not found\n")) + goto close_prog; + + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + err = bpf_map_lookup_elem(bpf_map__fd(data_map_fentry), &zero, &result); + if (CHECK(err, "get_result", + "failed to get output data: %d\n", err)) + goto close_prog; + + err = bpf_map_lookup_elem(bpf_map__fd(data_map_fexit), &zero, result + 6); + if (CHECK(err, "get_result", + "failed to get output data: %d\n", err)) + goto close_prog; + + for (i = 0; i < 12; i++) + if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", + i % 6 + 1, result[i])) + goto close_prog; + +close_prog: + for (i = 0; i < 12; i++) + if (!IS_ERR_OR_NULL(link[i])) + bpf_link__destroy(link[i]); + bpf_object__close(obj_fentry); + bpf_object__close(obj_fexit); + bpf_object__close(pkt_obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c new file mode 100644 index 000000000000..9fb103193878 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> + +void test_fentry_test(void) +{ + struct bpf_prog_load_attr attr = { + .file = "./fentry_test.o", + }; + + char prog_name[] = "fentry/bpf_fentry_testX"; + struct bpf_object *obj = NULL, *pkt_obj; + int err, pkt_fd, kfree_skb_fd, i; + struct bpf_link *link[6] = {}; + struct bpf_program *prog[6]; + __u32 duration, retval; + struct bpf_map *data_map; + const int zero = 0; + u64 result[6]; + + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, + &pkt_obj, &pkt_fd); + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + return; + err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd); + if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) + goto close_prog; + + for (i = 0; i < 6; i++) { + prog_name[sizeof(prog_name) - 2] = '1' + i; + prog[i] = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name)) + goto close_prog; + link[i] = bpf_program__attach_trace(prog[i]); + if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) + goto close_prog; + } + data_map = bpf_object__find_map_by_name(obj, "fentry_t.bss"); + if (CHECK(!data_map, "find_data_map", "data map not found\n")) + goto close_prog; + + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); + if (CHECK(err, "get_result", + "failed to get output data: %d\n", err)) + goto close_prog; + + for (i = 0; i < 6; i++) + if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", + i + 1, result[i])) + goto close_prog; + +close_prog: + for (i = 0; i < 6; i++) + if (!IS_ERR_OR_NULL(link[i])) + bpf_link__destroy(link[i]); + bpf_object__close(obj); + bpf_object__close(pkt_obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c new file mode 100644 index 000000000000..15c7378362dd --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> + +#define PROG_CNT 3 + +void test_fexit_bpf2bpf(void) +{ + const char *prog_name[PROG_CNT] = { + "fexit/test_pkt_access", + "fexit/test_pkt_access_subprog1", + "fexit/test_pkt_access_subprog2", + }; + struct bpf_object *obj = NULL, *pkt_obj; + int err, pkt_fd, i; + struct bpf_link *link[PROG_CNT] = {}; + struct bpf_program *prog[PROG_CNT]; + __u32 duration, retval; + struct bpf_map *data_map; + const int zero = 0; + u64 result[PROG_CNT]; + + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_UNSPEC, + &pkt_obj, &pkt_fd); + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + return; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .attach_prog_fd = pkt_fd, + ); + + obj = bpf_object__open_file("./fexit_bpf2bpf.o", &opts); + if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", + "failed to open fexit_bpf2bpf: %ld\n", + PTR_ERR(obj))) + goto close_prog; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto close_prog; + + for (i = 0; i < PROG_CNT; i++) { + prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]); + if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i])) + goto close_prog; + link[i] = bpf_program__attach_trace(prog[i]); + if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) + goto close_prog; + } + data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss"); + if (CHECK(!data_map, "find_data_map", "data map not found\n")) + goto close_prog; + + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); + if (CHECK(err, "get_result", + "failed to get output data: %d\n", err)) + goto close_prog; + + for (i = 0; i < PROG_CNT; i++) + if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %ld\n", + result[i])) + goto close_prog; + +close_prog: + for (i = 0; i < PROG_CNT; i++) + if (!IS_ERR_OR_NULL(link[i])) + bpf_link__destroy(link[i]); + if (!IS_ERR_OR_NULL(obj)) + bpf_object__close(obj); + bpf_object__close(pkt_obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c new file mode 100644 index 000000000000..3b9dbf7433f0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> + +/* x86-64 fits 55 JITed and 43 interpreted progs into half page */ +#define CNT 40 + +void test_fexit_stress(void) +{ + char test_skb[128] = {}; + int fexit_fd[CNT] = {}; + int link_fd[CNT] = {}; + __u32 duration = 0; + char error[4096]; + __u32 prog_ret; + int err, i, filter_fd; + + const struct bpf_insn trace_program[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_TRACING, + .license = "GPL", + .insns = trace_program, + .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), + .expected_attach_type = BPF_TRACE_FEXIT, + }; + + const struct bpf_insn skb_program[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr skb_load_attr = { + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .license = "GPL", + .insns = skb_program, + .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), + }; + + err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", + load_attr.expected_attach_type); + if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err)) + goto out; + load_attr.attach_btf_id = err; + + for (i = 0; i < CNT; i++) { + fexit_fd[i] = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + if (CHECK(fexit_fd[i] < 0, "fexit loaded", + "failed: %d errno %d\n", fexit_fd[i], errno)) + goto out; + link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]); + if (CHECK(link_fd[i] < 0, "fexit attach failed", + "prog %d failed: %d err %d\n", i, link_fd[i], errno)) + goto out; + } + + filter_fd = bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); + if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", + filter_fd, errno)) + goto out; + + err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, + 0, &prog_ret, 0); + close(filter_fd); + CHECK_FAIL(err); +out: + for (i = 0; i < CNT; i++) { + if (link_fd[i]) + close(link_fd[i]); + if (fexit_fd[i]) + close(fexit_fd[i]); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c new file mode 100644 index 000000000000..f99013222c74 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> + +void test_fexit_test(void) +{ + struct bpf_prog_load_attr attr = { + .file = "./fexit_test.o", + }; + + char prog_name[] = "fexit/bpf_fentry_testX"; + struct bpf_object *obj = NULL, *pkt_obj; + int err, pkt_fd, kfree_skb_fd, i; + struct bpf_link *link[6] = {}; + struct bpf_program *prog[6]; + __u32 duration, retval; + struct bpf_map *data_map; + const int zero = 0; + u64 result[6]; + + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, + &pkt_obj, &pkt_fd); + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + return; + err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd); + if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) + goto close_prog; + + for (i = 0; i < 6; i++) { + prog_name[sizeof(prog_name) - 2] = '1' + i; + prog[i] = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name)) + goto close_prog; + link[i] = bpf_program__attach_trace(prog[i]); + if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) + goto close_prog; + } + data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss"); + if (CHECK(!data_map, "find_data_map", "data map not found\n")) + goto close_prog; + + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); + if (CHECK(err, "get_result", + "failed to get output data: %d\n", err)) + goto close_prog; + + for (i = 0; i < 6; i++) + if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", + i + 1, result[i])) + goto close_prog; + +close_prog: + for (i = 0; i < 6; i++) + if (!IS_ERR_OR_NULL(link[i])) + bpf_link__destroy(link[i]); + bpf_object__close(obj); + bpf_object__close(pkt_obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index 55d36856e621..7507c8f689bc 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -60,15 +60,17 @@ void test_kfree_skb(void) .file = "./kfree_skb.o", }; + struct bpf_link *link = NULL, *link_fentry = NULL, *link_fexit = NULL; + struct bpf_map *perf_buf_map, *global_data; + struct bpf_program *prog, *fentry, *fexit; struct bpf_object *obj, *obj2 = NULL; struct perf_buffer_opts pb_opts = {}; struct perf_buffer *pb = NULL; - struct bpf_link *link = NULL; - struct bpf_map *perf_buf_map; - struct bpf_program *prog; int err, kfree_skb_fd; bool passed = false; __u32 duration = 0; + const int zero = 0; + bool test_ok[2]; err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &tattr.prog_fd); @@ -82,9 +84,28 @@ void test_kfree_skb(void) prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb"); if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n")) goto close_prog; + fentry = bpf_object__find_program_by_title(obj2, "fentry/eth_type_trans"); + if (CHECK(!fentry, "find_prog", "prog eth_type_trans not found\n")) + goto close_prog; + fexit = bpf_object__find_program_by_title(obj2, "fexit/eth_type_trans"); + if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n")) + goto close_prog; + + global_data = bpf_object__find_map_by_name(obj2, "kfree_sk.bss"); + if (CHECK(!global_data, "find global data", "not found\n")) + goto close_prog; + link = bpf_program__attach_raw_tracepoint(prog, NULL); if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) goto close_prog; + link_fentry = bpf_program__attach_trace(fentry); + if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n", + PTR_ERR(link_fentry))) + goto close_prog; + link_fexit = bpf_program__attach_trace(fexit); + if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n", + PTR_ERR(link_fexit))) + goto close_prog; perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map"); if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n")) @@ -108,14 +129,26 @@ void test_kfree_skb(void) err = perf_buffer__poll(pb, 100); if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) goto close_prog; + /* make sure kfree_skb program was triggered * and it sent expected skb into ring buffer */ CHECK_FAIL(!passed); + + err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok); + if (CHECK(err, "get_result", + "failed to get output data: %d\n", err)) + goto close_prog; + + CHECK_FAIL(!test_ok[0] || !test_ok[1]); close_prog: perf_buffer__free(pb); if (!IS_ERR_OR_NULL(link)) bpf_link__destroy(link); + if (!IS_ERR_OR_NULL(link_fentry)) + bpf_link__destroy(link_fentry); + if (!IS_ERR_OR_NULL(link_fexit)) + bpf_link__destroy(link_fexit); bpf_object__close(obj); bpf_object__close(obj2); } diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c new file mode 100644 index 000000000000..545788bf8d50 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fentry_test.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +char _license[] SEC("license") = "GPL"; + +struct test1 { + ks32 a; +}; +static volatile __u64 test1_result; +SEC("fentry/bpf_fentry_test1") +int test1(struct test1 *ctx) +{ + test1_result = ctx->a == 1; + return 0; +} + +struct test2 { + ks32 a; + ku64 b; +}; +static volatile __u64 test2_result; +SEC("fentry/bpf_fentry_test2") +int test2(struct test2 *ctx) +{ + test2_result = ctx->a == 2 && ctx->b == 3; + return 0; +} + +struct test3 { + ks8 a; + ks32 b; + ku64 c; +}; +static volatile __u64 test3_result; +SEC("fentry/bpf_fentry_test3") +int test3(struct test3 *ctx) +{ + test3_result = ctx->a == 4 && ctx->b == 5 && ctx->c == 6; + return 0; +} + +struct test4 { + void *a; + ks8 b; + ks32 c; + ku64 d; +}; +static volatile __u64 test4_result; +SEC("fentry/bpf_fentry_test4") +int test4(struct test4 *ctx) +{ + test4_result = ctx->a == (void *)7 && ctx->b == 8 && ctx->c == 9 && + ctx->d == 10; + return 0; +} + +struct test5 { + ku64 a; + void *b; + ks16 c; + ks32 d; + ku64 e; +}; +static volatile __u64 test5_result; +SEC("fentry/bpf_fentry_test5") +int test5(struct test5 *ctx) +{ + test5_result = ctx->a == 11 && ctx->b == (void *)12 && ctx->c == 13 && + ctx->d == 14 && ctx->e == 15; + return 0; +} + +struct test6 { + ku64 a; + void *b; + ks16 c; + ks32 d; + void *e; + ks64 f; +}; +static volatile __u64 test6_result; +SEC("fentry/bpf_fentry_test6") +int test6(struct test6 *ctx) +{ + test6_result = ctx->a == 16 && ctx->b == (void *)17 && ctx->c == 18 && + ctx->d == 19 && ctx->e == (void *)20 && ctx->f == 21; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c new file mode 100644 index 000000000000..981f0474da5a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +struct sk_buff { + unsigned int len; +}; + +struct args { + struct sk_buff *skb; + ks32 ret; +}; +static volatile __u64 test_result; +SEC("fexit/test_pkt_access") +int test_main(struct args *ctx) +{ + struct sk_buff *skb = ctx->skb; + int len; + + __builtin_preserve_access_index(({ + len = skb->len; + })); + if (len != 74 || ctx->ret != 0) + return 0; + test_result = 1; + return 0; +} + +struct args_subprog1 { + struct sk_buff *skb; + ks32 ret; +}; +static volatile __u64 test_result_subprog1; +SEC("fexit/test_pkt_access_subprog1") +int test_subprog1(struct args_subprog1 *ctx) +{ + struct sk_buff *skb = ctx->skb; + int len; + + __builtin_preserve_access_index(({ + len = skb->len; + })); + if (len != 74 || ctx->ret != 148) + return 0; + test_result_subprog1 = 1; + return 0; +} + +/* Though test_pkt_access_subprog2() is defined in C as: + * static __attribute__ ((noinline)) + * int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb) + * { + * return skb->len * val; + * } + * llvm optimizations remove 'int val' argument and generate BPF assembly: + * r0 = *(u32 *)(r1 + 0) + * w0 <<= 1 + * exit + * In such case the verifier falls back to conservative and + * tracing program can access arguments and return value as u64 + * instead of accurate types. + */ +struct args_subprog2 { + ku64 args[5]; + ku64 ret; +}; +static volatile __u64 test_result_subprog2; +SEC("fexit/test_pkt_access_subprog2") +int test_subprog2(struct args_subprog2 *ctx) +{ + struct sk_buff *skb = (void *)ctx->args[0]; + __u64 ret; + int len; + + bpf_probe_read_kernel(&len, sizeof(len), + __builtin_preserve_access_index(&skb->len)); + + ret = ctx->ret; + /* bpf_prog_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32 + * which randomizes upper 32 bits after BPF_ALU32 insns. + * Hence after 'w0 <<= 1' upper bits of $rax are random. + * That is expected and correct. Trim them. + */ + ret = (__u32) ret; + if (len != 74 || ret != 148) + return 0; + test_result_subprog2 = 1; + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c new file mode 100644 index 000000000000..8b98b1a51784 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fexit_test.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +char _license[] SEC("license") = "GPL"; + +struct test1 { + ks32 a; + ks32 ret; +}; +static volatile __u64 test1_result; +SEC("fexit/bpf_fentry_test1") +int test1(struct test1 *ctx) +{ + test1_result = ctx->a == 1 && ctx->ret == 2; + return 0; +} + +struct test2 { + ks32 a; + ku64 b; + ks32 ret; +}; +static volatile __u64 test2_result; +SEC("fexit/bpf_fentry_test2") +int test2(struct test2 *ctx) +{ + test2_result = ctx->a == 2 && ctx->b == 3 && ctx->ret == 5; + return 0; +} + +struct test3 { + ks8 a; + ks32 b; + ku64 c; + ks32 ret; +}; +static volatile __u64 test3_result; +SEC("fexit/bpf_fentry_test3") +int test3(struct test3 *ctx) +{ + test3_result = ctx->a == 4 && ctx->b == 5 && ctx->c == 6 && + ctx->ret == 15; + return 0; +} + +struct test4 { + void *a; + ks8 b; + ks32 c; + ku64 d; + ks32 ret; +}; +static volatile __u64 test4_result; +SEC("fexit/bpf_fentry_test4") +int test4(struct test4 *ctx) +{ + test4_result = ctx->a == (void *)7 && ctx->b == 8 && ctx->c == 9 && + ctx->d == 10 && ctx->ret == 34; + return 0; +} + +struct test5 { + ku64 a; + void *b; + ks16 c; + ks32 d; + ku64 e; + ks32 ret; +}; +static volatile __u64 test5_result; +SEC("fexit/bpf_fentry_test5") +int test5(struct test5 *ctx) +{ + test5_result = ctx->a == 11 && ctx->b == (void *)12 && ctx->c == 13 && + ctx->d == 14 && ctx->e == 15 && ctx->ret == 65; + return 0; +} + +struct test6 { + ku64 a; + void *b; + ks16 c; + ks32 d; + void *e; + ks64 f; + ks32 ret; +}; +static volatile __u64 test6_result; +SEC("fexit/bpf_fentry_test6") +int test6(struct test6 *ctx) +{ + test6_result = ctx->a == 16 && ctx->b == (void *)17 && ctx->c == 18 && + ctx->d == 19 && ctx->e == (void *)20 && ctx->f == 21 && + ctx->ret == 111; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c index f769fdbf6725..dcc9feac8338 100644 --- a/tools/testing/selftests/bpf/progs/kfree_skb.c +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #include <linux/bpf.h> +#include <stdbool.h> #include "bpf_helpers.h" #include "bpf_endian.h" @@ -116,3 +117,54 @@ int trace_kfree_skb(struct trace_kfree_skb *ctx) &meta, sizeof(meta)); return 0; } + +static volatile struct { + bool fentry_test_ok; + bool fexit_test_ok; +} result; + +struct eth_type_trans_args { + struct sk_buff *skb; + struct net_device *dev; + unsigned short protocol; /* return value available to fexit progs */ +}; + +SEC("fentry/eth_type_trans") +int fentry_eth_type_trans(struct eth_type_trans_args *ctx) +{ + struct sk_buff *skb = ctx->skb; + struct net_device *dev = ctx->dev; + int len, ifindex; + + __builtin_preserve_access_index(({ + len = skb->len; + ifindex = dev->ifindex; + })); + + /* fentry sees full packet including L2 header */ + if (len != 74 || ifindex != 1) + return 0; + result.fentry_test_ok = true; + return 0; +} + +SEC("fexit/eth_type_trans") +int fexit_eth_type_trans(struct eth_type_trans_args *ctx) +{ + struct sk_buff *skb = ctx->skb; + struct net_device *dev = ctx->dev; + int len, ifindex; + + __builtin_preserve_access_index(({ + len = skb->len; + ifindex = dev->ifindex; + })); + + /* fexit sees packet without L2 header that eth_type_trans should have + * consumed. + */ + if (len != 60 || ctx->protocol != bpf_htons(0x86dd) || ifindex != 1) + return 0; + result.fexit_test_ok = true; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 7cf42d14103f..3a7b4b607ed3 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -17,8 +17,38 @@ #define barrier() __asm__ __volatile__("": : :"memory") int _version SEC("version") = 1; -SEC("test1") -int process(struct __sk_buff *skb) +/* llvm will optimize both subprograms into exactly the same BPF assembly + * + * Disassembly of section .text: + * + * 0000000000000000 test_pkt_access_subprog1: + * ; return skb->len * 2; + * 0: 61 10 00 00 00 00 00 00 r0 = *(u32 *)(r1 + 0) + * 1: 64 00 00 00 01 00 00 00 w0 <<= 1 + * 2: 95 00 00 00 00 00 00 00 exit + * + * 0000000000000018 test_pkt_access_subprog2: + * ; return skb->len * val; + * 3: 61 10 00 00 00 00 00 00 r0 = *(u32 *)(r1 + 0) + * 4: 64 00 00 00 01 00 00 00 w0 <<= 1 + * 5: 95 00 00 00 00 00 00 00 exit + * + * Which makes it an interesting test for BTF-enabled verifier. + */ +static __attribute__ ((noinline)) +int test_pkt_access_subprog1(volatile struct __sk_buff *skb) +{ + return skb->len * 2; +} + +static __attribute__ ((noinline)) +int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb) +{ + return skb->len * val; +} + +SEC("classifier/test_pkt_access") +int test_pkt_access(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -48,6 +78,10 @@ int process(struct __sk_buff *skb) tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len); } + if (test_pkt_access_subprog1(skb) != skb->len * 2) + return TC_ACT_SHOT; + if (test_pkt_access_subprog2(2, skb) != skb->len * 2) + return TC_ACT_SHOT; if (tcp) { if (((void *)(tcp) + 20) > data_end || proto != 6) return TC_ACT_SHOT; |