summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/net/bpf_jit_comp.c424
-rw-r--r--include/linux/bpf.h138
-rw-r--r--include/linux/bpf_types.h78
-rw-r--r--include/linux/bpf_verifier.h1
-rw-r--r--include/linux/btf.h1
-rw-r--r--include/uapi/linux/bpf.h3
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/btf.c363
-rw-r--r--kernel/bpf/core.c9
-rw-r--r--kernel/bpf/syscall.c77
-rw-r--r--kernel/bpf/trampoline.c253
-rw-r--r--kernel/bpf/verifier.c131
-rw-r--r--net/bpf/test_run.c41
-rw-r--r--net/core/filter.c12
-rw-r--r--tools/include/uapi/linux/bpf.h3
-rw-r--r--tools/lib/bpf/bpf.c8
-rw-r--r--tools/lib/bpf/bpf.h5
-rw-r--r--tools/lib/bpf/bpf_helpers.h13
-rw-r--r--tools/lib/bpf/btf.c22
-rw-r--r--tools/lib/bpf/btf.h2
-rw-r--r--tools/lib/bpf/libbpf.c154
-rw-r--r--tools/lib/bpf/libbpf.h7
-rw-r--r--tools/lib/bpf/libbpf.map3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c90
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c76
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c76
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c39
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_test.c90
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c91
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_test.c98
-rw-r--r--tools/testing/selftests/bpf/progs/kfree_skb.c52
-rw-r--r--tools/testing/selftests/bpf/progs/test_pkt_access.c38
34 files changed, 2354 insertions, 173 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8cd23d8309bf..2e586f579945 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -9,9 +9,11 @@
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
+#include <linux/memory.h>
#include <asm/extable.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
+#include <asm/text-patching.h>
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -96,6 +98,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
/* Pick a register outside of BPF range for JIT internal work */
#define AUX_REG (MAX_BPF_JIT_REG + 1)
+#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
/*
* The following table maps BPF registers to x86-64 registers.
@@ -104,8 +107,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
* register in load/store instructions, it always needs an
* extra byte of encoding and is callee saved.
*
- * Also x86-64 register R9 is unused. x86-64 register R10 is
- * used for blinding (if enabled).
+ * x86-64 register R9 is not used by BPF programs, but can be used by BPF
+ * trampoline. x86-64 register R10 is used for blinding (if enabled).
*/
static const int reg2hex[] = {
[BPF_REG_0] = 0, /* RAX */
@@ -121,6 +124,7 @@ static const int reg2hex[] = {
[BPF_REG_FP] = 5, /* RBP readonly */
[BPF_REG_AX] = 2, /* R10 temp register */
[AUX_REG] = 3, /* R11 temp register */
+ [X86_REG_R9] = 1, /* R9 register, 6th function argument */
};
static const int reg2pt_regs[] = {
@@ -148,6 +152,7 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_7) |
BIT(BPF_REG_8) |
BIT(BPF_REG_9) |
+ BIT(X86_REG_R9) |
BIT(BPF_REG_AX));
}
@@ -198,8 +203,10 @@ struct jit_context {
/* Maximum number of bytes emitted while JITing one eBPF insn */
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
+/* number of bytes emit_call() needs to generate call instruction */
+#define X86_CALL_SIZE 5
-#define PROLOGUE_SIZE 20
+#define PROLOGUE_SIZE 25
/*
* Emit x86-64 prologue code for BPF program and check its size.
@@ -208,8 +215,13 @@ struct jit_context {
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
{
u8 *prog = *pprog;
- int cnt = 0;
+ int cnt = X86_CALL_SIZE;
+ /* BPF trampoline can be made to work without these nops,
+ * but let's waste 5 bytes for now and optimize later
+ */
+ memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt);
+ prog += cnt;
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
/* sub rsp, rounded_stack_depth */
@@ -390,6 +402,149 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
*pprog = prog;
}
+/* LDX: dst_reg = *(u8*)(src_reg + off) */
+static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ switch (size) {
+ case BPF_B:
+ /* Emit 'movzx rax, byte ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
+ break;
+ case BPF_H:
+ /* Emit 'movzx rax, word ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
+ break;
+ case BPF_W:
+ /* Emit 'mov eax, dword ptr [rax+0x14]' */
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
+ else
+ EMIT1(0x8B);
+ break;
+ case BPF_DW:
+ /* Emit 'mov rax, qword ptr [rax+0x14]' */
+ EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
+ break;
+ }
+ /*
+ * If insn->off == 0 we can save one extra byte, but
+ * special case of x86 R13 which always needs an offset
+ * is not worth the hassle
+ */
+ if (is_imm8(off))
+ EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
+ else
+ EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
+ *pprog = prog;
+}
+
+/* STX: *(u8*)(dst_reg + off) = src_reg */
+static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ switch (size) {
+ case BPF_B:
+ /* Emit 'mov byte ptr [rax + off], al' */
+ if (is_ereg(dst_reg) || is_ereg(src_reg) ||
+ /* We have to add extra byte for x86 SIL, DIL regs */
+ src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
+ else
+ EMIT1(0x88);
+ break;
+ case BPF_H:
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
+ else
+ EMIT2(0x66, 0x89);
+ break;
+ case BPF_W:
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
+ else
+ EMIT1(0x89);
+ break;
+ case BPF_DW:
+ EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
+ break;
+ }
+ if (is_imm8(off))
+ EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
+ else
+ EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
+ *pprog = prog;
+}
+
+static int emit_call(u8 **pprog, void *func, void *ip)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ s64 offset;
+
+ offset = func - (ip + X86_CALL_SIZE);
+ if (!is_simm32(offset)) {
+ pr_err("Target call %p is out of range\n", func);
+ return -EINVAL;
+ }
+ EMIT1_off32(0xE8, offset);
+ *pprog = prog;
+ return 0;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr)
+{
+ u8 old_insn[X86_CALL_SIZE] = {};
+ u8 new_insn[X86_CALL_SIZE] = {};
+ u8 *prog;
+ int ret;
+
+ if (!is_kernel_text((long)ip) &&
+ !is_bpf_text_address((long)ip))
+ /* BPF trampoline in modules is not supported */
+ return -EINVAL;
+
+ if (old_addr) {
+ prog = old_insn;
+ ret = emit_call(&prog, old_addr, (void *)ip);
+ if (ret)
+ return ret;
+ }
+ if (new_addr) {
+ prog = new_insn;
+ ret = emit_call(&prog, new_addr, (void *)ip);
+ if (ret)
+ return ret;
+ }
+ ret = -EBUSY;
+ mutex_lock(&text_mutex);
+ switch (t) {
+ case BPF_MOD_NOP_TO_CALL:
+ if (memcmp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE))
+ goto out;
+ text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL);
+ break;
+ case BPF_MOD_CALL_TO_CALL:
+ if (memcmp(ip, old_insn, X86_CALL_SIZE))
+ goto out;
+ text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL);
+ break;
+ case BPF_MOD_CALL_TO_NOP:
+ if (memcmp(ip, old_insn, X86_CALL_SIZE))
+ goto out;
+ text_poke_bp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE, NULL);
+ break;
+ }
+ ret = 0;
+out:
+ mutex_unlock(&text_mutex);
+ return ret;
+}
static bool ex_handler_bpf(const struct exception_table_entry *x,
struct pt_regs *regs, int trapnr,
@@ -773,68 +928,22 @@ st: if (is_imm8(insn->off))
/* STX: *(u8*)(dst_reg + off) = src_reg */
case BPF_STX | BPF_MEM | BPF_B:
- /* Emit 'mov byte ptr [rax + off], al' */
- if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* We have to add extra byte for x86 SIL, DIL regs */
- src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
- EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
- else
- EMIT1(0x88);
- goto stx;
case BPF_STX | BPF_MEM | BPF_H:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
- else
- EMIT2(0x66, 0x89);
- goto stx;
case BPF_STX | BPF_MEM | BPF_W:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
- else
- EMIT1(0x89);
- goto stx;
case BPF_STX | BPF_MEM | BPF_DW:
- EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
-stx: if (is_imm8(insn->off))
- EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
- else
- EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
- insn->off);
+ emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
break;
/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
- /* Emit 'movzx rax, byte ptr [rax + off]' */
- EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
- goto ldx;
case BPF_LDX | BPF_MEM | BPF_H:
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
- /* Emit 'movzx rax, word ptr [rax + off]' */
- EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
- goto ldx;
case BPF_LDX | BPF_MEM | BPF_W:
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
- /* Emit 'mov eax, dword ptr [rax+0x14]' */
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
- else
- EMIT1(0x8B);
- goto ldx;
case BPF_LDX | BPF_MEM | BPF_DW:
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
- /* Emit 'mov rax, qword ptr [rax+0x14]' */
- EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
-ldx: /*
- * If insn->off == 0 we can save one extra byte, but
- * special case of x86 R13 which always needs an offset
- * is not worth the hassle
- */
- if (is_imm8(insn->off))
- EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
- else
- EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
- insn->off);
+ emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
struct exception_table_entry *ex;
u8 *_insn = image + proglen;
@@ -899,13 +1008,8 @@ xadd: if (is_imm8(insn->off))
/* call */
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
- jmp_offset = func - (image + addrs[i]);
- if (!imm32 || !is_simm32(jmp_offset)) {
- pr_err("unsupported BPF func %d addr %p image %p\n",
- imm32, func, image);
+ if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
return -EINVAL;
- }
- EMIT1_off32(0xE8, jmp_offset);
break;
case BPF_JMP | BPF_TAIL_CALL:
@@ -1138,6 +1242,210 @@ emit_jmp:
return proglen;
}
+static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+ int stack_size)
+{
+ int i;
+ /* Store function arguments to stack.
+ * For a function that accepts two pointers the sequence will be:
+ * mov QWORD PTR [rbp-0x10],rdi
+ * mov QWORD PTR [rbp-0x8],rsi
+ */
+ for (i = 0; i < min(nr_args, 6); i++)
+ emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]),
+ BPF_REG_FP,
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ -(stack_size - i * 8));
+}
+
+static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+ int stack_size)
+{
+ int i;
+
+ /* Restore function arguments from stack.
+ * For a function that accepts two pointers the sequence will be:
+ * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
+ * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
+ */
+ for (i = 0; i < min(nr_args, 6); i++)
+ emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]),
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ BPF_REG_FP,
+ -(stack_size - i * 8));
+}
+
+static int invoke_bpf(struct btf_func_model *m, u8 **pprog,
+ struct bpf_prog **progs, int prog_cnt, int stack_size)
+{
+ u8 *prog = *pprog;
+ int cnt = 0, i;
+
+ for (i = 0; i < prog_cnt; i++) {
+ if (emit_call(&prog, __bpf_prog_enter, prog))
+ return -EINVAL;
+ /* remember prog start time returned by __bpf_prog_enter */
+ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+
+ /* arg1: lea rdi, [rbp - stack_size] */
+ EMIT4(0x48, 0x8D, 0x7D, -stack_size);
+ /* arg2: progs[i]->insnsi for interpreter */
+ if (!progs[i]->jited)
+ emit_mov_imm64(&prog, BPF_REG_2,
+ (long) progs[i]->insnsi >> 32,
+ (u32) (long) progs[i]->insnsi);
+ /* call JITed bpf program or interpreter */
+ if (emit_call(&prog, progs[i]->bpf_func, prog))
+ return -EINVAL;
+
+ /* arg1: mov rdi, progs[i] */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32,
+ (u32) (long) progs[i]);
+ /* arg2: mov rsi, rbx <- start time in nsec */
+ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+ if (emit_call(&prog, __bpf_prog_exit, prog))
+ return -EINVAL;
+ }
+ *pprog = prog;
+ return 0;
+}
+
+/* Example:
+ * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+ * its 'struct btf_func_model' will be nr_args=2
+ * The assembly code when eth_type_trans is executing after trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 16 // space for skb and dev
+ * push rbx // temp regs to pass start time
+ * mov qword ptr [rbp - 16], rdi // save skb pointer to stack
+ * mov qword ptr [rbp - 8], rsi // save dev pointer to stack
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 16] // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 16] // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 8] // restore dev pointer from stack
+ * pop rbx
+ * leave
+ * ret
+ *
+ * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be
+ * replaced with 'call generated_bpf_trampoline'. When it returns
+ * eth_type_trans will continue executing with original skb and dev pointers.
+ *
+ * The assembly code when eth_type_trans is called from trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 24 // space for skb, dev, return value
+ * push rbx // temp regs to pass start time
+ * mov qword ptr [rbp - 24], rdi // save skb pointer to stack
+ * mov qword ptr [rbp - 16], rsi // save dev pointer to stack
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time if bpf stats are enabled
+ * lea rdi, [rbp - 24] // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog // bpf prog can access skb and dev
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 24] // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 16] // restore dev pointer from stack
+ * call eth_type_trans+5 // execute body of eth_type_trans
+ * mov qword ptr [rbp - 8], rax // save return value
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 24] // R1==ctx of bpf prog
+ * call addr_of_jited_FEXIT_prog // bpf prog can access skb, dev, return value
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rax, qword ptr [rbp - 8] // restore eth_type_trans's return value
+ * pop rbx
+ * leave
+ * add rsp, 8 // skip eth_type_trans's frame
+ * ret // return to its caller
+ */
+int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+ struct bpf_prog **fentry_progs, int fentry_cnt,
+ struct bpf_prog **fexit_progs, int fexit_cnt,
+ void *orig_call)
+{
+ int cnt = 0, nr_args = m->nr_args;
+ int stack_size = nr_args * 8;
+ u8 *prog;
+
+ /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
+ if (nr_args > 6)
+ return -ENOTSUPP;
+
+ if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
+ (flags & BPF_TRAMP_F_SKIP_FRAME))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG)
+ stack_size += 8; /* room for return value of orig_call */
+
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* skip patched call instruction and point orig_call to actual
+ * body of the kernel function.
+ */
+ orig_call += X86_CALL_SIZE;
+
+ prog = image;
+
+ EMIT1(0x55); /* push rbp */
+ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+ EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
+ EMIT1(0x53); /* push rbx */
+
+ save_regs(m, &prog, nr_args, stack_size);
+
+ if (fentry_cnt)
+ if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ if (fentry_cnt)
+ restore_regs(m, &prog, nr_args, stack_size);
+
+ /* call original function */
+ if (emit_call(&prog, orig_call, prog))
+ return -EINVAL;
+ /* remember return value in a stack for bpf prog to access */
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
+ }
+
+ if (fexit_cnt)
+ if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_RESTORE_REGS)
+ restore_regs(m, &prog, nr_args, stack_size);
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG)
+ /* restore original return value back into RAX */
+ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
+
+ EMIT1(0x5B); /* pop rbx */
+ EMIT1(0xC9); /* leave */
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* skip our return address and return to parent */
+ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
+ EMIT1(0xC3); /* ret */
+ /* One half of the page has active running trampoline.
+ * Another half is an area for next trampoline.
+ * Make sure the trampoline generation logic doesn't overflow.
+ */
+ if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY))
+ return -EFAULT;
+ return 0;
+}
+
struct x64_jit_data {
struct bpf_binary_header *header;
int *addrs;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7c7f518811a6..5b81cde47314 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -14,6 +14,8 @@
#include <linux/numa.h>
#include <linux/wait.h>
#include <linux/u64_stats_sync.h>
+#include <linux/refcount.h>
+#include <linux/mutex.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -246,7 +248,7 @@ struct bpf_func_proto {
};
enum bpf_arg_type arg_type[5];
};
- u32 *btf_id; /* BTF ids of arguments */
+ int *btf_id; /* BTF ids of arguments */
};
/* bpf_context is intentionally undefined structure. Pointer to bpf_context is
@@ -384,6 +386,104 @@ struct bpf_prog_stats {
struct u64_stats_sync syncp;
} __aligned(2 * sizeof(u64));
+struct btf_func_model {
+ u8 ret_size;
+ u8 nr_args;
+ u8 arg_size[MAX_BPF_FUNC_ARGS];
+};
+
+/* Restore arguments before returning from trampoline to let original function
+ * continue executing. This flag is used for fentry progs when there are no
+ * fexit progs.
+ */
+#define BPF_TRAMP_F_RESTORE_REGS BIT(0)
+/* Call original function after fentry progs, but before fexit progs.
+ * Makes sense for fentry/fexit, normal calls and indirect calls.
+ */
+#define BPF_TRAMP_F_CALL_ORIG BIT(1)
+/* Skip current frame and return to parent. Makes sense for fentry/fexit
+ * programs only. Should not be used with normal calls and indirect calls.
+ */
+#define BPF_TRAMP_F_SKIP_FRAME BIT(2)
+
+/* Different use cases for BPF trampoline:
+ * 1. replace nop at the function entry (kprobe equivalent)
+ * flags = BPF_TRAMP_F_RESTORE_REGS
+ * fentry = a set of programs to run before returning from trampoline
+ *
+ * 2. replace nop at the function entry (kprobe + kretprobe equivalent)
+ * flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME
+ * orig_call = fentry_ip + MCOUNT_INSN_SIZE
+ * fentry = a set of program to run before calling original function
+ * fexit = a set of program to run after original function
+ *
+ * 3. replace direct call instruction anywhere in the function body
+ * or assign a function pointer for indirect call (like tcp_congestion_ops->cong_avoid)
+ * With flags = 0
+ * fentry = a set of programs to run before returning from trampoline
+ * With flags = BPF_TRAMP_F_CALL_ORIG
+ * orig_call = original callback addr or direct function addr
+ * fentry = a set of program to run before calling original function
+ * fexit = a set of program to run after original function
+ */
+int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+ struct bpf_prog **fentry_progs, int fentry_cnt,
+ struct bpf_prog **fexit_progs, int fexit_cnt,
+ void *orig_call);
+/* these two functions are called from generated trampoline */
+u64 notrace __bpf_prog_enter(void);
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
+
+enum bpf_tramp_prog_type {
+ BPF_TRAMP_FENTRY,
+ BPF_TRAMP_FEXIT,
+ BPF_TRAMP_MAX
+};
+
+struct bpf_trampoline {
+ /* hlist for trampoline_table */
+ struct hlist_node hlist;
+ /* serializes access to fields of this trampoline */
+ struct mutex mutex;
+ refcount_t refcnt;
+ u64 key;
+ struct {
+ struct btf_func_model model;
+ void *addr;
+ } func;
+ /* list of BPF programs using this trampoline */
+ struct hlist_head progs_hlist[BPF_TRAMP_MAX];
+ /* Number of attached programs. A counter per kind. */
+ int progs_cnt[BPF_TRAMP_MAX];
+ /* Executable image of trampoline */
+ void *image;
+ u64 selector;
+};
+#ifdef CONFIG_BPF_JIT
+struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
+int bpf_trampoline_link_prog(struct bpf_prog *prog);
+int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
+void bpf_trampoline_put(struct bpf_trampoline *tr);
+#else
+static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+{
+ return NULL;
+}
+static inline int bpf_trampoline_link_prog(struct bpf_prog *prog)
+{
+ return -ENOTSUPP;
+}
+static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
+{
+ return -ENOTSUPP;
+}
+static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
+#endif
+
+struct bpf_func_info_aux {
+ bool unreliable;
+};
+
struct bpf_prog_aux {
atomic_t refcnt;
u32 used_map_cnt;
@@ -395,9 +495,14 @@ struct bpf_prog_aux {
u32 func_cnt; /* used by non-func prog as the number of func progs */
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
u32 attach_btf_id; /* in-kernel BTF type id to attach to */
+ struct bpf_prog *linked_prog;
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
bool offload_requested;
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
+ bool func_proto_unreliable;
+ enum bpf_tramp_prog_type trampoline_prog_type;
+ struct bpf_trampoline *trampoline;
+ struct hlist_node tramp_hlist;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
/* function name for valid attach_btf_id */
@@ -419,6 +524,7 @@ struct bpf_prog_aux {
struct bpf_prog_offload *offload;
struct btf *btf;
struct bpf_func_info *func_info;
+ struct bpf_func_info_aux *func_info_aux;
/* bpf_line_info loaded from userspace. linfo->insn_off
* has the xlated insn offset.
* Both the main and sub prog share the same linfo.
@@ -648,7 +754,7 @@ DECLARE_PER_CPU(int, bpf_prog_active);
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
-#define BPF_PROG_TYPE(_id, _name) \
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
extern const struct bpf_prog_ops _name ## _prog_ops; \
extern const struct bpf_verifier_ops _name ## _verifier_ops;
#define BPF_MAP_TYPE(_id, _ops) \
@@ -782,7 +888,16 @@ int btf_struct_access(struct bpf_verifier_log *log,
const struct btf_type *t, int off, int size,
enum bpf_access_type atype,
u32 *next_btf_id);
-u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *, int);
+int btf_resolve_helper_id(struct bpf_verifier_log *log,
+ const struct bpf_func_proto *fn, int);
+
+int btf_distill_func_proto(struct bpf_verifier_log *log,
+ struct btf *btf,
+ const struct btf_type *func_proto,
+ const char *func_name,
+ struct btf_func_model *m);
+
+int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
@@ -1107,6 +1222,15 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
#endif
#ifdef CONFIG_INET
+struct sk_reuseport_kern {
+ struct sk_buff *skb;
+ struct sock *sk;
+ struct sock *selected_sk;
+ void *data_end;
+ u32 hash;
+ u32 reuseport_id;
+ bool bind_inany;
+};
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info);
@@ -1157,4 +1281,12 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
}
#endif /* CONFIG_INET */
+enum bpf_text_poke_type {
+ BPF_MOD_NOP_TO_CALL,
+ BPF_MOD_CALL_TO_CALL,
+ BPF_MOD_CALL_TO_NOP,
+};
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *addr1, void *addr2);
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index de14872b01ba..93740b3614d7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -2,42 +2,68 @@
/* internal file - do not include directly */
#ifdef CONFIG_NET
-BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
-BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp,
+ struct xdp_md, struct xdp_buff)
#ifdef CONFIG_CGROUP_BPF
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock,
+ struct bpf_sock, struct sock)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr,
+ struct bpf_sock_addr, struct bpf_sock_addr_kern)
#endif
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
-BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops,
+ struct bpf_sock_ops, struct bpf_sock_ops_kern)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg,
+ struct sk_msg_md, struct sk_msg)
+BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector,
+ struct __sk_buff, struct bpf_flow_dissector)
#endif
#ifdef CONFIG_BPF_EVENTS
-BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
-BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
-BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
-BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
-BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
-BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing)
+BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe,
+ bpf_user_pt_regs_t, struct pt_regs)
+BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint,
+ __u64, u64)
+BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event,
+ struct bpf_perf_event_data, struct bpf_perf_event_data_kern)
+BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint,
+ struct bpf_raw_tracepoint_args, u64)
+BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable,
+ struct bpf_raw_tracepoint_args, u64)
+BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing,
+ void *, void *)
#endif
#ifdef CONFIG_CGROUP_BPF
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev,
+ struct bpf_cgroup_dev_ctx, struct bpf_cgroup_dev_ctx)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl,
+ struct bpf_sysctl, struct bpf_sysctl_kern)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt,
+ struct bpf_sockopt, struct bpf_sockopt_kern)
#endif
#ifdef CONFIG_BPF_LIRC_MODE2
-BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2,
+ __u32, u32)
#endif
#ifdef CONFIG_INET
-BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport,
+ struct sk_reuseport_md, struct sk_reuseport_kern)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 6e7284ea1468..cdd08bf0ec06 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -343,6 +343,7 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
#define BPF_MAX_SUBPROGS 256
struct bpf_subprog_info {
+ /* 'start' has to be the first field otherwise find_subprog() won't work */
u32 start; /* insn idx of function entry point */
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
u16 stack_depth; /* max. stack depth used by this function */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 9dee00859c5f..79d4abc2556a 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -88,6 +88,7 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t)
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
+struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
u32 type_id)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index df6809a76404..4842a134b202 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -201,6 +201,8 @@ enum bpf_attach_type {
BPF_CGROUP_GETSOCKOPT,
BPF_CGROUP_SETSOCKOPT,
BPF_TRACE_RAW_TP,
+ BPF_TRACE_FENTRY,
+ BPF_TRACE_FEXIT,
__MAX_BPF_ATTACH_TYPE
};
@@ -423,6 +425,7 @@ union bpf_attr {
__aligned_u64 line_info; /* line info */
__u32 line_info_cnt; /* number of bpf_line_info records */
__u32 attach_btf_id; /* in-kernel BTF type id to attach to */
+ __u32 attach_prog_fd; /* 0 to attach to vmlinux */
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e1d9adb212f9..3f671bf617e8 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
+obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4639c4ba9a9b..40efde5eedcb 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -2,6 +2,8 @@
/* Copyright (c) 2018 Facebook */
#include <uapi/linux/btf.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/bpf_perf_event.h>
#include <uapi/linux/types.h>
#include <linux/seq_file.h>
#include <linux/compiler.h>
@@ -16,6 +18,9 @@
#include <linux/sort.h>
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
+#include <linux/skmsg.h>
+#include <linux/perf_event.h>
+#include <net/sock.h>
/* BTF (BPF Type Format) is the meta data format which describes
* the data types of BPF program/map. Hence, it basically focus
@@ -3439,13 +3444,112 @@ errout:
extern char __weak _binary__btf_vmlinux_bin_start[];
extern char __weak _binary__btf_vmlinux_bin_end[];
+extern struct btf *btf_vmlinux;
+
+#define BPF_MAP_TYPE(_id, _ops)
+static union {
+ struct bpf_ctx_convert {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
+ prog_ctx_type _id##_prog; \
+ kern_ctx_type _id##_kern;
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+ } *__t;
+ /* 't' is written once under lock. Read many times. */
+ const struct btf_type *t;
+} bpf_ctx_convert;
+enum {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
+ __ctx_convert##_id,
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+};
+static u8 bpf_ctx_convert_map[] = {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
+ [_id] = __ctx_convert##_id,
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+};
+#undef BPF_MAP_TYPE
+
+static const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type)
+{
+ const struct btf_type *conv_struct;
+ const struct btf_type *ctx_struct;
+ const struct btf_member *ctx_type;
+ const char *tname, *ctx_tname;
+
+ conv_struct = bpf_ctx_convert.t;
+ if (!conv_struct) {
+ bpf_log(log, "btf_vmlinux is malformed\n");
+ return NULL;
+ }
+ t = btf_type_by_id(btf, t->type);
+ while (btf_type_is_modifier(t))
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_struct(t)) {
+ /* Only pointer to struct is supported for now.
+ * That means that BPF_PROG_TYPE_TRACEPOINT with BTF
+ * is not supported yet.
+ * BPF_PROG_TYPE_RAW_TRACEPOINT is fine.
+ */
+ bpf_log(log, "BPF program ctx type is not a struct\n");
+ return NULL;
+ }
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (!tname) {
+ bpf_log(log, "BPF program ctx struct doesn't have a name\n");
+ return NULL;
+ }
+ /* prog_type is valid bpf program type. No need for bounds check. */
+ ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2;
+ /* ctx_struct is a pointer to prog_ctx_type in vmlinux.
+ * Like 'struct __sk_buff'
+ */
+ ctx_struct = btf_type_by_id(btf_vmlinux, ctx_type->type);
+ if (!ctx_struct)
+ /* should not happen */
+ return NULL;
+ ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off);
+ if (!ctx_tname) {
+ /* should not happen */
+ bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n");
+ return NULL;
+ }
+ /* only compare that prog's ctx type name is the same as
+ * kernel expects. No need to compare field by field.
+ * It's ok for bpf prog to do:
+ * struct __sk_buff {};
+ * int socket_filter_bpf_prog(struct __sk_buff *skb)
+ * { // no fields of skb are ever used }
+ */
+ if (strcmp(ctx_tname, tname))
+ return NULL;
+ return ctx_type;
+}
+
+static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
+ struct btf *btf,
+ const struct btf_type *t,
+ enum bpf_prog_type prog_type)
+{
+ const struct btf_member *prog_ctx_type, *kern_ctx_type;
+
+ prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type);
+ if (!prog_ctx_type)
+ return -ENOENT;
+ kern_ctx_type = prog_ctx_type + 1;
+ return kern_ctx_type->type;
+}
struct btf *btf_parse_vmlinux(void)
{
struct btf_verifier_env *env = NULL;
struct bpf_verifier_log *log;
struct btf *btf = NULL;
- int err;
+ int err, i;
env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
if (!env)
@@ -3479,6 +3583,26 @@ struct btf *btf_parse_vmlinux(void)
if (err)
goto errout;
+ /* find struct bpf_ctx_convert for type checking later */
+ for (i = 1; i <= btf->nr_types; i++) {
+ const struct btf_type *t;
+ const char *tname;
+
+ t = btf_type_by_id(btf, i);
+ if (!__btf_type_is_struct(t))
+ continue;
+ tname = __btf_name_by_offset(btf, t->name_off);
+ if (!strcmp(tname, "bpf_ctx_convert")) {
+ /* btf_parse_vmlinux() runs under bpf_verifier_lock */
+ bpf_ctx_convert.t = t;
+ break;
+ }
+ }
+ if (i > btf->nr_types) {
+ err = -ENOENT;
+ goto errout;
+ }
+
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
return btf;
@@ -3492,17 +3616,29 @@ errout:
return ERR_PTR(err);
}
-extern struct btf *btf_vmlinux;
+struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
+{
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+
+ if (tgt_prog) {
+ return tgt_prog->aux->btf;
+ } else {
+ return btf_vmlinux;
+ }
+}
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const struct btf_type *t = prog->aux->attach_func_proto;
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+ struct btf *btf = bpf_prog_get_target_btf(prog);
const char *tname = prog->aux->attach_func_name;
struct bpf_verifier_log *log = info->log;
const struct btf_param *args;
u32 nr_args, arg;
+ int ret;
if (off % 8) {
bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
@@ -3511,22 +3647,34 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
arg = off / 8;
args = (const struct btf_param *)(t + 1);
- nr_args = btf_type_vlen(t);
+ /* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */
+ nr_args = t ? btf_type_vlen(t) : 5;
if (prog->aux->attach_btf_trace) {
/* skip first 'void *__data' argument in btf_trace_##name typedef */
args++;
nr_args--;
}
- if (arg >= nr_args) {
+
+ if (prog->expected_attach_type == BPF_TRACE_FEXIT &&
+ arg == nr_args) {
+ if (!t)
+ /* Default prog with 5 args. 6th arg is retval. */
+ return true;
+ /* function return type */
+ t = btf_type_by_id(btf, t->type);
+ } else if (arg >= nr_args) {
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
- tname, arg);
+ tname, arg + 1);
return false;
+ } else {
+ if (!t)
+ /* Default prog with 5 args */
+ return true;
+ t = btf_type_by_id(btf, args[arg].type);
}
-
- t = btf_type_by_id(btf_vmlinux, args[arg].type);
/* skip modifiers */
while (btf_type_is_modifier(t))
- t = btf_type_by_id(btf_vmlinux, t->type);
+ t = btf_type_by_id(btf, t->type);
if (btf_type_is_int(t))
/* accessing a scalar */
return true;
@@ -3534,7 +3682,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
bpf_log(log,
"func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
tname, arg,
- __btf_name_by_offset(btf_vmlinux, t->name_off),
+ __btf_name_by_offset(btf, t->name_off),
btf_kind_str[BTF_INFO_KIND(t->info)]);
return false;
}
@@ -3549,10 +3697,19 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->reg_type = PTR_TO_BTF_ID;
info->btf_id = t->type;
- t = btf_type_by_id(btf_vmlinux, t->type);
+ if (tgt_prog) {
+ ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type);
+ if (ret > 0) {
+ info->btf_id = ret;
+ return true;
+ } else {
+ return false;
+ }
+ }
+ t = btf_type_by_id(btf, t->type);
/* skip modifiers */
while (btf_type_is_modifier(t))
- t = btf_type_by_id(btf_vmlinux, t->type);
+ t = btf_type_by_id(btf, t->type);
if (!btf_type_is_struct(t)) {
bpf_log(log,
"func '%s' arg%d type %s is not a struct\n",
@@ -3561,7 +3718,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)],
- __btf_name_by_offset(btf_vmlinux, t->name_off));
+ __btf_name_by_offset(btf, t->name_off));
return true;
}
@@ -3716,7 +3873,8 @@ again:
return -EINVAL;
}
-u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg)
+static int __btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn,
+ int arg)
{
char fnname[KSYM_SYMBOL_LEN + 4] = "btf_";
const struct btf_param *args;
@@ -3784,6 +3942,185 @@ u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg)
return btf_id;
}
+int btf_resolve_helper_id(struct bpf_verifier_log *log,
+ const struct bpf_func_proto *fn, int arg)
+{
+ int *btf_id = &fn->btf_id[arg];
+ int ret;
+
+ if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID)
+ return -EINVAL;
+
+ ret = READ_ONCE(*btf_id);
+ if (ret)
+ return ret;
+ /* ok to race the search. The result is the same */
+ ret = __btf_resolve_helper_id(log, fn->func, arg);
+ if (!ret) {
+ /* Function argument cannot be type 'void' */
+ bpf_log(log, "BTF resolution bug\n");
+ return -EFAULT;
+ }
+ WRITE_ONCE(*btf_id, ret);
+ return ret;
+}
+
+static int __get_type_size(struct btf *btf, u32 btf_id,
+ const struct btf_type **bad_type)
+{
+ const struct btf_type *t;
+
+ if (!btf_id)
+ /* void */
+ return 0;
+ t = btf_type_by_id(btf, btf_id);
+ while (t && btf_type_is_modifier(t))
+ t = btf_type_by_id(btf, t->type);
+ if (!t)
+ return -EINVAL;
+ if (btf_type_is_ptr(t))
+ /* kernel size of pointer. Not BPF's size of pointer*/
+ return sizeof(void *);
+ if (btf_type_is_int(t) || btf_type_is_enum(t))
+ return t->size;
+ *bad_type = t;
+ return -EINVAL;
+}
+
+int btf_distill_func_proto(struct bpf_verifier_log *log,
+ struct btf *btf,
+ const struct btf_type *func,
+ const char *tname,
+ struct btf_func_model *m)
+{
+ const struct btf_param *args;
+ const struct btf_type *t;
+ u32 i, nargs;
+ int ret;
+
+ if (!func) {
+ /* BTF function prototype doesn't match the verifier types.
+ * Fall back to 5 u64 args.
+ */
+ for (i = 0; i < 5; i++)
+ m->arg_size[i] = 8;
+ m->ret_size = 8;
+ m->nr_args = 5;
+ return 0;
+ }
+ args = (const struct btf_param *)(func + 1);
+ nargs = btf_type_vlen(func);
+ if (nargs >= MAX_BPF_FUNC_ARGS) {
+ bpf_log(log,
+ "The function %s has %d arguments. Too many.\n",
+ tname, nargs);
+ return -EINVAL;
+ }
+ ret = __get_type_size(btf, func->type, &t);
+ if (ret < 0) {
+ bpf_log(log,
+ "The function %s return type %s is unsupported.\n",
+ tname, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ return -EINVAL;
+ }
+ m->ret_size = ret;
+
+ for (i = 0; i < nargs; i++) {
+ ret = __get_type_size(btf, args[i].type, &t);
+ if (ret < 0) {
+ bpf_log(log,
+ "The function %s arg%d type %s is unsupported.\n",
+ tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ return -EINVAL;
+ }
+ m->arg_size[i] = ret;
+ }
+ m->nr_args = nargs;
+ return 0;
+}
+
+int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog)
+{
+ struct bpf_verifier_state *st = env->cur_state;
+ struct bpf_func_state *func = st->frame[st->curframe];
+ struct bpf_reg_state *reg = func->regs;
+ struct bpf_verifier_log *log = &env->log;
+ struct bpf_prog *prog = env->prog;
+ struct btf *btf = prog->aux->btf;
+ const struct btf_param *args;
+ const struct btf_type *t;
+ u32 i, nargs, btf_id;
+ const char *tname;
+
+ if (!prog->aux->func_info)
+ return 0;
+
+ btf_id = prog->aux->func_info[subprog].type_id;
+ if (!btf_id)
+ return 0;
+
+ if (prog->aux->func_info_aux[subprog].unreliable)
+ return 0;
+
+ t = btf_type_by_id(btf, btf_id);
+ if (!t || !btf_type_is_func(t)) {
+ bpf_log(log, "BTF of subprog %d doesn't point to KIND_FUNC\n",
+ subprog);
+ return -EINVAL;
+ }
+ tname = btf_name_by_offset(btf, t->name_off);
+
+ t = btf_type_by_id(btf, t->type);
+ if (!t || !btf_type_is_func_proto(t)) {
+ bpf_log(log, "Invalid type of func %s\n", tname);
+ return -EINVAL;
+ }
+ args = (const struct btf_param *)(t + 1);
+ nargs = btf_type_vlen(t);
+ if (nargs > 5) {
+ bpf_log(log, "Function %s has %d > 5 args\n", tname, nargs);
+ goto out;
+ }
+ /* check that BTF function arguments match actual types that the
+ * verifier sees.
+ */
+ for (i = 0; i < nargs; i++) {
+ t = btf_type_by_id(btf, args[i].type);
+ while (btf_type_is_modifier(t))
+ t = btf_type_by_id(btf, t->type);
+ if (btf_type_is_int(t) || btf_type_is_enum(t)) {
+ if (reg[i + 1].type == SCALAR_VALUE)
+ continue;
+ bpf_log(log, "R%d is not a scalar\n", i + 1);
+ goto out;
+ }
+ if (btf_type_is_ptr(t)) {
+ if (reg[i + 1].type == SCALAR_VALUE) {
+ bpf_log(log, "R%d is not a pointer\n", i + 1);
+ goto out;
+ }
+ /* If program is passing PTR_TO_CTX into subprogram
+ * check that BTF type matches.
+ */
+ if (reg[i + 1].type == PTR_TO_CTX &&
+ !btf_get_prog_ctx_type(log, btf, t, prog->type))
+ goto out;
+ /* All other pointers are ok */
+ continue;
+ }
+ bpf_log(log, "Unrecognized argument type %s\n",
+ btf_kind_str[BTF_INFO_KIND(t->info)]);
+ goto out;
+ }
+ return 0;
+out:
+ /* LLVM optimizations can remove arguments from static functions. */
+ bpf_log(log,
+ "Type info disagrees with actual arguments due to compiler optimizations\n");
+ prog->aux->func_info_aux[subprog].unreliable = true;
+ return 0;
+}
+
void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
struct seq_file *m)
{
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 99693f3c4e99..b5945c3aaa8e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2015,6 +2015,7 @@ static void bpf_prog_free_deferred(struct work_struct *work)
if (aux->prog->has_callchain_buf)
put_callchain_buffers();
#endif
+ bpf_trampoline_put(aux->trampoline);
for (i = 0; i < aux->func_cnt; i++)
bpf_jit_free(aux->func[i]);
if (aux->func_cnt) {
@@ -2030,6 +2031,8 @@ void bpf_prog_free(struct bpf_prog *fp)
{
struct bpf_prog_aux *aux = fp->aux;
+ if (aux->linked_prog)
+ bpf_prog_put(aux->linked_prog);
INIT_WORK(&aux->work, bpf_prog_free_deferred);
schedule_work(&aux->work);
}
@@ -2144,6 +2147,12 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
return -EFAULT;
}
+int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *addr1, void *addr2)
+{
+ return -ENOTSUPP;
+}
+
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
EXPORT_SYMBOL(bpf_stats_enabled_key);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6d9ce95e5a8d..c88c815c2154 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(map_idr_lock);
int sysctl_unprivileged_bpf_disabled __read_mostly;
static const struct bpf_map_ops * const bpf_map_types[] = {
-#define BPF_PROG_TYPE(_id, _ops)
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
#define BPF_MAP_TYPE(_id, _ops) \
[_id] = &_ops,
#include <linux/bpf_types.h>
@@ -1189,7 +1189,7 @@ err_put:
}
static const struct bpf_prog_ops * const bpf_prog_types[] = {
-#define BPF_PROG_TYPE(_id, _name) \
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
[_id] = & _name ## _prog_ops,
#define BPF_MAP_TYPE(_id, _ops)
#include <linux/bpf_types.h>
@@ -1328,6 +1328,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
kvfree(aux->func_info);
+ kfree(aux->func_info_aux);
free_used_maps(aux);
bpf_prog_uncharge_memlock(aux->prog);
security_bpf_prog_free(aux);
@@ -1576,7 +1577,7 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
static int
bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
enum bpf_attach_type expected_attach_type,
- u32 btf_id)
+ u32 btf_id, u32 prog_fd)
{
switch (prog_type) {
case BPF_PROG_TYPE_TRACING:
@@ -1584,7 +1585,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
return -EINVAL;
break;
default:
- if (btf_id)
+ if (btf_id || prog_fd)
return -EINVAL;
break;
}
@@ -1635,7 +1636,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
}
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD attach_btf_id
+#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
{
@@ -1678,7 +1679,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
bpf_prog_load_fixup_attach_type(attr);
if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
- attr->attach_btf_id))
+ attr->attach_btf_id,
+ attr->attach_prog_fd))
return -EINVAL;
/* plain bpf_prog allocation */
@@ -1688,6 +1690,16 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
prog->expected_attach_type = attr->expected_attach_type;
prog->aux->attach_btf_id = attr->attach_btf_id;
+ if (attr->attach_prog_fd) {
+ struct bpf_prog *tgt_prog;
+
+ tgt_prog = bpf_prog_get(attr->attach_prog_fd);
+ if (IS_ERR(tgt_prog)) {
+ err = PTR_ERR(tgt_prog);
+ goto free_prog_nouncharge;
+ }
+ prog->aux->linked_prog = tgt_prog;
+ }
prog->aux->offload_requested = !!attr->prog_ifindex;
@@ -1799,6 +1811,49 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags);
}
+static int bpf_tracing_prog_release(struct inode *inode, struct file *filp)
+{
+ struct bpf_prog *prog = filp->private_data;
+
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
+ bpf_prog_put(prog);
+ return 0;
+}
+
+static const struct file_operations bpf_tracing_prog_fops = {
+ .release = bpf_tracing_prog_release,
+ .read = bpf_dummy_read,
+ .write = bpf_dummy_write,
+};
+
+static int bpf_tracing_prog_attach(struct bpf_prog *prog)
+{
+ int tr_fd, err;
+
+ if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
+ prog->expected_attach_type != BPF_TRACE_FEXIT) {
+ err = -EINVAL;
+ goto out_put_prog;
+ }
+
+ err = bpf_trampoline_link_prog(prog);
+ if (err)
+ goto out_put_prog;
+
+ tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops,
+ prog, O_CLOEXEC);
+ if (tr_fd < 0) {
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
+ err = tr_fd;
+ goto out_put_prog;
+ }
+ return tr_fd;
+
+out_put_prog:
+ bpf_prog_put(prog);
+ return err;
+}
+
struct bpf_raw_tracepoint {
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
@@ -1850,14 +1905,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
if (prog->type == BPF_PROG_TYPE_TRACING) {
if (attr->raw_tracepoint.name) {
- /* raw_tp name should not be specified in raw_tp
- * programs that were verified via in-kernel BTF info
+ /* The attach point for this category of programs
+ * should be specified via btf_id during program load.
*/
err = -EINVAL;
goto out_put_prog;
}
- /* raw_tp name is taken from type name instead */
- tp_name = prog->aux->attach_func_name;
+ if (prog->expected_attach_type == BPF_TRACE_RAW_TP)
+ tp_name = prog->aux->attach_func_name;
+ else
+ return bpf_tracing_prog_attach(prog);
} else {
if (strncpy_from_user(buf,
u64_to_user_ptr(attr->raw_tracepoint.name),
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
new file mode 100644
index 000000000000..10ae59d65f13
--- /dev/null
+++ b/kernel/bpf/trampoline.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <linux/hash.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
+#define TRAMPOLINE_HASH_BITS 10
+#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
+
+static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
+
+/* serializes access to trampoline_table */
+static DEFINE_MUTEX(trampoline_mutex);
+
+struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+{
+ struct bpf_trampoline *tr;
+ struct hlist_head *head;
+ void *image;
+ int i;
+
+ mutex_lock(&trampoline_mutex);
+ head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
+ hlist_for_each_entry(tr, head, hlist) {
+ if (tr->key == key) {
+ refcount_inc(&tr->refcnt);
+ goto out;
+ }
+ }
+ tr = kzalloc(sizeof(*tr), GFP_KERNEL);
+ if (!tr)
+ goto out;
+
+ /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
+ image = bpf_jit_alloc_exec(PAGE_SIZE);
+ if (!image) {
+ kfree(tr);
+ tr = NULL;
+ goto out;
+ }
+
+ tr->key = key;
+ INIT_HLIST_NODE(&tr->hlist);
+ hlist_add_head(&tr->hlist, head);
+ refcount_set(&tr->refcnt, 1);
+ mutex_init(&tr->mutex);
+ for (i = 0; i < BPF_TRAMP_MAX; i++)
+ INIT_HLIST_HEAD(&tr->progs_hlist[i]);
+
+ set_vm_flush_reset_perms(image);
+ /* Keep image as writeable. The alternative is to keep flipping ro/rw
+ * everytime new program is attached or detached.
+ */
+ set_memory_x((long)image, 1);
+ tr->image = image;
+out:
+ mutex_unlock(&trampoline_mutex);
+ return tr;
+}
+
+/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
+ * bytes on x86. Pick a number to fit into PAGE_SIZE / 2
+ */
+#define BPF_MAX_TRAMP_PROGS 40
+
+static int bpf_trampoline_update(struct bpf_trampoline *tr)
+{
+ void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
+ void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
+ struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
+ int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
+ int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
+ struct bpf_prog **progs, **fentry, **fexit;
+ u32 flags = BPF_TRAMP_F_RESTORE_REGS;
+ struct bpf_prog_aux *aux;
+ int err;
+
+ if (fentry_cnt + fexit_cnt == 0) {
+ err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_NOP,
+ old_image, NULL);
+ tr->selector = 0;
+ goto out;
+ }
+
+ /* populate fentry progs */
+ fentry = progs = progs_to_run;
+ hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist)
+ *progs++ = aux->prog;
+
+ /* populate fexit progs */
+ fexit = progs;
+ hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist)
+ *progs++ = aux->prog;
+
+ if (fexit_cnt)
+ flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
+
+ err = arch_prepare_bpf_trampoline(new_image, &tr->func.model, flags,
+ fentry, fentry_cnt,
+ fexit, fexit_cnt,
+ tr->func.addr);
+ if (err)
+ goto out;
+
+ if (tr->selector)
+ /* progs already running at this address */
+ err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_CALL,
+ old_image, new_image);
+ else
+ /* first time registering */
+ err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP_TO_CALL,
+ NULL, new_image);
+ if (err)
+ goto out;
+ tr->selector++;
+out:
+ return err;
+}
+
+static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
+{
+ switch (t) {
+ case BPF_TRACE_FENTRY:
+ return BPF_TRAMP_FENTRY;
+ default:
+ return BPF_TRAMP_FEXIT;
+ }
+}
+
+int bpf_trampoline_link_prog(struct bpf_prog *prog)
+{
+ enum bpf_tramp_prog_type kind;
+ struct bpf_trampoline *tr;
+ int err = 0;
+
+ tr = prog->aux->trampoline;
+ kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
+ mutex_lock(&tr->mutex);
+ if (tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]
+ >= BPF_MAX_TRAMP_PROGS) {
+ err = -E2BIG;
+ goto out;
+ }
+ if (!hlist_unhashed(&prog->aux->tramp_hlist)) {
+ /* prog already linked */
+ err = -EBUSY;
+ goto out;
+ }
+ hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
+ tr->progs_cnt[kind]++;
+ err = bpf_trampoline_update(prog->aux->trampoline);
+ if (err) {
+ hlist_del(&prog->aux->tramp_hlist);
+ tr->progs_cnt[kind]--;
+ }
+out:
+ mutex_unlock(&tr->mutex);
+ return err;
+}
+
+/* bpf_trampoline_unlink_prog() should never fail. */
+int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
+{
+ enum bpf_tramp_prog_type kind;
+ struct bpf_trampoline *tr;
+ int err;
+
+ tr = prog->aux->trampoline;
+ kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
+ mutex_lock(&tr->mutex);
+ hlist_del(&prog->aux->tramp_hlist);
+ tr->progs_cnt[kind]--;
+ err = bpf_trampoline_update(prog->aux->trampoline);
+ mutex_unlock(&tr->mutex);
+ return err;
+}
+
+void bpf_trampoline_put(struct bpf_trampoline *tr)
+{
+ if (!tr)
+ return;
+ mutex_lock(&trampoline_mutex);
+ if (!refcount_dec_and_test(&tr->refcnt))
+ goto out;
+ WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
+ if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY])))
+ goto out;
+ if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
+ goto out;
+ bpf_jit_free_exec(tr->image);
+ hlist_del(&tr->hlist);
+ kfree(tr);
+out:
+ mutex_unlock(&trampoline_mutex);
+}
+
+/* The logic is similar to BPF_PROG_RUN, but with explicit rcu and preempt that
+ * are needed for trampoline. The macro is split into
+ * call _bpf_prog_enter
+ * call prog->bpf_func
+ * call __bpf_prog_exit
+ */
+u64 notrace __bpf_prog_enter(void)
+{
+ u64 start = 0;
+
+ rcu_read_lock();
+ preempt_disable();
+ if (static_branch_unlikely(&bpf_stats_enabled_key))
+ start = sched_clock();
+ return start;
+}
+
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
+{
+ struct bpf_prog_stats *stats;
+
+ if (static_branch_unlikely(&bpf_stats_enabled_key) &&
+ /* static_key could be enabled in __bpf_prog_enter
+ * and disabled in __bpf_prog_exit.
+ * And vice versa.
+ * Hence check that 'start' is not zero.
+ */
+ start) {
+ stats = this_cpu_ptr(prog->aux->stats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->cnt++;
+ stats->nsecs += sched_clock() - start;
+ u64_stats_update_end(&stats->syncp);
+ }
+ preempt_enable();
+ rcu_read_unlock();
+}
+
+int __weak
+arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+ struct bpf_prog **fentry_progs, int fentry_cnt,
+ struct bpf_prog **fexit_progs, int fexit_cnt,
+ void *orig_call)
+{
+ return -ENOTSUPP;
+}
+
+static int __init init_trampolines(void)
+{
+ int i;
+
+ for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
+ INIT_HLIST_HEAD(&trampoline_table[i]);
+ return 0;
+}
+late_initcall(init_trampolines);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2f2374967b36..e9dc95a18d44 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -23,7 +23,7 @@
#include "disasm.h"
static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
-#define BPF_PROG_TYPE(_id, _name) \
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
[_id] = & _name ## _verifier_ops,
#define BPF_MAP_TYPE(_id, _ops)
#include <linux/bpf_types.h>
@@ -3970,6 +3970,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* only increment it after check_reg_arg() finished */
state->curframe++;
+ if (btf_check_func_arg_match(env, subprog))
+ return -EINVAL;
+
/* and go analyze first insn of the callee */
*insn_idx = target_insn;
@@ -4147,11 +4150,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id;
/* check args */
for (i = 0; i < 5; i++) {
- if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID) {
- if (!fn->btf_id[i])
- fn->btf_id[i] = btf_resolve_helper_id(&env->log, fn->func, i);
- meta.btf_id = fn->btf_id[i];
- }
+ err = btf_resolve_helper_id(&env->log, fn, i);
+ if (err > 0)
+ meta.btf_id = err;
err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta);
if (err)
return err;
@@ -6566,6 +6567,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
u32 i, nfuncs, urec_size, min_size;
u32 krec_size = sizeof(struct bpf_func_info);
struct bpf_func_info *krecord;
+ struct bpf_func_info_aux *info_aux = NULL;
const struct btf_type *type;
struct bpf_prog *prog;
const struct btf *btf;
@@ -6599,6 +6601,9 @@ static int check_btf_func(struct bpf_verifier_env *env,
krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
if (!krecord)
return -ENOMEM;
+ info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
+ if (!info_aux)
+ goto err_free;
for (i = 0; i < nfuncs; i++) {
ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
@@ -6650,29 +6655,31 @@ static int check_btf_func(struct bpf_verifier_env *env,
ret = -EINVAL;
goto err_free;
}
-
prev_offset = krecord[i].insn_off;
urecord += urec_size;
}
prog->aux->func_info = krecord;
prog->aux->func_info_cnt = nfuncs;
+ prog->aux->func_info_aux = info_aux;
return 0;
err_free:
kvfree(krecord);
+ kfree(info_aux);
return ret;
}
static void adjust_btf_func(struct bpf_verifier_env *env)
{
+ struct bpf_prog_aux *aux = env->prog->aux;
int i;
- if (!env->prog->aux->func_info)
+ if (!aux->func_info)
return;
for (i = 0; i < env->subprog_cnt; i++)
- env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
+ aux->func_info[i].insn_off = env->subprog_info[i].start;
}
#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
@@ -7653,6 +7660,9 @@ static int do_check(struct bpf_verifier_env *env)
0 /* frameno */,
0 /* subprogno, zero == main subprog */);
+ if (btf_check_func_arg_match(env, 0))
+ return -EINVAL;
+
for (;;) {
struct bpf_insn *insn;
u8 class;
@@ -9380,10 +9390,17 @@ static void print_verification_stats(struct bpf_verifier_env *env)
static int check_attach_btf_id(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
u32 btf_id = prog->aux->attach_btf_id;
const char prefix[] = "btf_trace_";
+ int ret = 0, subprog = -1, i;
+ struct bpf_trampoline *tr;
const struct btf_type *t;
+ bool conservative = true;
const char *tname;
+ struct btf *btf;
+ long addr;
+ u64 key;
if (prog->type != BPF_PROG_TYPE_TRACING)
return 0;
@@ -9392,19 +9409,47 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
verbose(env, "Tracing programs must provide btf_id\n");
return -EINVAL;
}
- t = btf_type_by_id(btf_vmlinux, btf_id);
+ btf = bpf_prog_get_target_btf(prog);
+ if (!btf) {
+ verbose(env,
+ "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf, btf_id);
if (!t) {
verbose(env, "attach_btf_id %u is invalid\n", btf_id);
return -EINVAL;
}
- tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ tname = btf_name_by_offset(btf, t->name_off);
if (!tname) {
verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
return -EINVAL;
}
+ if (tgt_prog) {
+ struct bpf_prog_aux *aux = tgt_prog->aux;
+
+ for (i = 0; i < aux->func_info_cnt; i++)
+ if (aux->func_info[i].type_id == btf_id) {
+ subprog = i;
+ break;
+ }
+ if (subprog == -1) {
+ verbose(env, "Subprog %s doesn't exist\n", tname);
+ return -EINVAL;
+ }
+ conservative = aux->func_info_aux[subprog].unreliable;
+ key = ((u64)aux->id) << 32 | btf_id;
+ } else {
+ key = btf_id;
+ }
switch (prog->expected_attach_type) {
case BPF_TRACE_RAW_TP:
+ if (tgt_prog) {
+ verbose(env,
+ "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
+ return -EINVAL;
+ }
if (!btf_type_is_typedef(t)) {
verbose(env, "attach_btf_id %u is not a typedef\n",
btf_id);
@@ -9416,11 +9461,11 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
return -EINVAL;
}
tname += sizeof(prefix) - 1;
- t = btf_type_by_id(btf_vmlinux, t->type);
+ t = btf_type_by_id(btf, t->type);
if (!btf_type_is_ptr(t))
/* should never happen in valid vmlinux build */
return -EINVAL;
- t = btf_type_by_id(btf_vmlinux, t->type);
+ t = btf_type_by_id(btf, t->type);
if (!btf_type_is_func_proto(t))
/* should never happen in valid vmlinux build */
return -EINVAL;
@@ -9432,6 +9477,66 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
prog->aux->attach_func_proto = t;
prog->aux->attach_btf_trace = true;
return 0;
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ if (!btf_type_is_func(t)) {
+ verbose(env, "attach_btf_id %u is not a function\n",
+ btf_id);
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_func_proto(t))
+ return -EINVAL;
+ tr = bpf_trampoline_lookup(key);
+ if (!tr)
+ return -ENOMEM;
+ prog->aux->attach_func_name = tname;
+ /* t is either vmlinux type or another program's type */
+ prog->aux->attach_func_proto = t;
+ mutex_lock(&tr->mutex);
+ if (tr->func.addr) {
+ prog->aux->trampoline = tr;
+ goto out;
+ }
+ if (tgt_prog && conservative) {
+ prog->aux->attach_func_proto = NULL;
+ t = NULL;
+ }
+ ret = btf_distill_func_proto(&env->log, btf, t,
+ tname, &tr->func.model);
+ if (ret < 0)
+ goto out;
+ if (tgt_prog) {
+ if (!tgt_prog->jited) {
+ /* for now */
+ verbose(env, "Can trace only JITed BPF progs\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (tgt_prog->type == BPF_PROG_TYPE_TRACING) {
+ /* prevent cycles */
+ verbose(env, "Cannot recursively attach\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
+ } else {
+ addr = kallsyms_lookup_name(tname);
+ if (!addr) {
+ verbose(env,
+ "The address of function %s cannot be found\n",
+ tname);
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+ tr->func.addr = (void *)addr;
+ prog->aux->trampoline = tr;
+out:
+ mutex_unlock(&tr->mutex);
+ if (ret)
+ bpf_trampoline_put(tr);
+ return ret;
default:
return -EINVAL;
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0be4497cb832..62933279fbba 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -105,6 +105,40 @@ out:
return err;
}
+/* Integer types of various sizes and pointer combinations cover variety of
+ * architecture dependent calling conventions. 7+ can be supported in the
+ * future.
+ */
+int noinline bpf_fentry_test1(int a)
+{
+ return a + 1;
+}
+
+int noinline bpf_fentry_test2(int a, u64 b)
+{
+ return a + b;
+}
+
+int noinline bpf_fentry_test3(char a, int b, u64 c)
+{
+ return a + b + c;
+}
+
+int noinline bpf_fentry_test4(void *a, char b, int c, u64 d)
+{
+ return (long)a + b + c + d;
+}
+
+int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e)
+{
+ return a + (long)b + c + d + e;
+}
+
+int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
+{
+ return a + (long)b + c + d + (long)e + f;
+}
+
static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
u32 headroom, u32 tailroom)
{
@@ -122,6 +156,13 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
kfree(data);
return ERR_PTR(-EFAULT);
}
+ if (bpf_fentry_test1(1) != 2 ||
+ bpf_fentry_test2(2, 3) != 5 ||
+ bpf_fentry_test3(4, 5, 6) != 15 ||
+ bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
+ bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
+ bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111)
+ return ERR_PTR(-EFAULT);
return data;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index fc303abec8fa..49ded4a7588a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3816,7 +3816,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-static u32 bpf_skb_output_btf_ids[5];
+static int bpf_skb_output_btf_ids[5];
const struct bpf_func_proto bpf_skb_output_proto = {
.func = bpf_skb_event_output,
.gpl_only = true,
@@ -8684,16 +8684,6 @@ out:
}
#ifdef CONFIG_INET
-struct sk_reuseport_kern {
- struct sk_buff *skb;
- struct sock *sk;
- struct sock *selected_sk;
- void *data_end;
- u32 hash;
- u32 reuseport_id;
- bool bind_inany;
-};
-
static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
struct sock_reuseport *reuse,
struct sock *sk, struct sk_buff *skb,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index df6809a76404..4842a134b202 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -201,6 +201,8 @@ enum bpf_attach_type {
BPF_CGROUP_GETSOCKOPT,
BPF_CGROUP_SETSOCKOPT,
BPF_TRACE_RAW_TP,
+ BPF_TRACE_FENTRY,
+ BPF_TRACE_FEXIT,
__MAX_BPF_ATTACH_TYPE
};
@@ -423,6 +425,7 @@ union bpf_attr {
__aligned_u64 line_info; /* line info */
__u32 line_info_cnt; /* number of bpf_line_info records */
__u32 attach_btf_id; /* in-kernel BTF type id to attach to */
+ __u32 attach_prog_fd; /* 0 to attach to vmlinux */
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index b3e3e99a0f28..98596e15390f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -228,10 +228,13 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
memset(&attr, 0, sizeof(attr));
attr.prog_type = load_attr->prog_type;
attr.expected_attach_type = load_attr->expected_attach_type;
- if (attr.prog_type == BPF_PROG_TYPE_TRACING)
+ if (attr.prog_type == BPF_PROG_TYPE_TRACING) {
attr.attach_btf_id = load_attr->attach_btf_id;
- else
+ attr.attach_prog_fd = load_attr->attach_prog_fd;
+ } else {
attr.prog_ifindex = load_attr->prog_ifindex;
+ attr.kern_version = load_attr->kern_version;
+ }
attr.insn_cnt = (__u32)load_attr->insns_cnt;
attr.insns = ptr_to_u64(load_attr->insns);
attr.license = ptr_to_u64(load_attr->license);
@@ -245,7 +248,6 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
attr.log_size = 0;
}
- attr.kern_version = load_attr->kern_version;
attr.prog_btf_fd = load_attr->prog_btf_fd;
attr.func_info_rec_size = load_attr->func_info_rec_size;
attr.func_info_cnt = load_attr->func_info_cnt;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 1c53bc5b4b3c..3c791fa8e68e 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -77,7 +77,10 @@ struct bpf_load_program_attr {
const struct bpf_insn *insns;
size_t insns_cnt;
const char *license;
- __u32 kern_version;
+ union {
+ __u32 kern_version;
+ __u32 attach_prog_fd;
+ };
union {
__u32 prog_ifindex;
__u32 attach_btf_id;
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 0c7d28292898..c63ab1add126 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -44,4 +44,17 @@ enum libbpf_pin_type {
LIBBPF_PIN_BY_NAME,
};
+/* The following types should be used by BPF_PROG_TYPE_TRACING program to
+ * access kernel function arguments. BPF trampoline and raw tracepoints
+ * typecast arguments to 'unsigned long long'.
+ */
+typedef int __attribute__((aligned(8))) ks32;
+typedef char __attribute__((aligned(8))) ks8;
+typedef short __attribute__((aligned(8))) ks16;
+typedef long long __attribute__((aligned(8))) ks64;
+typedef unsigned int __attribute__((aligned(8))) ku32;
+typedef unsigned char __attribute__((aligned(8))) ku8;
+typedef unsigned short __attribute__((aligned(8))) ku16;
+typedef unsigned long long __attribute__((aligned(8))) ku64;
+
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 86a1847e4a9f..88efa2bb7137 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -316,6 +316,28 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
return -ENOENT;
}
+__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
+ __u32 kind)
+{
+ __u32 i;
+
+ if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
+ return 0;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ const struct btf_type *t = btf->types[i];
+ const char *name;
+
+ if (btf_kind(t) != kind)
+ continue;
+ name = btf__name_by_offset(btf, t->name_off);
+ if (name && !strcmp(type_name, name))
+ return i;
+ }
+
+ return -ENOENT;
+}
+
void btf__free(struct btf *btf)
{
if (!btf)
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index b18994116a44..d9ac73a02cde 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -72,6 +72,8 @@ LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
LIBBPF_API int btf__load(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
const char *type_name);
+LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
+ const char *type_name, __u32 kind);
LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
__u32 id);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 96ef18cfeffb..7132c6bdec02 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -189,6 +189,7 @@ struct bpf_program {
enum bpf_attach_type expected_attach_type;
__u32 attach_btf_id;
+ __u32 attach_prog_fd;
void *func_info;
__u32 func_info_rec_size;
__u32 func_info_cnt;
@@ -3683,8 +3684,13 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.insns = insns;
load_attr.insns_cnt = insns_cnt;
load_attr.license = license;
- load_attr.kern_version = kern_version;
- load_attr.prog_ifindex = prog->prog_ifindex;
+ if (prog->type == BPF_PROG_TYPE_TRACING) {
+ load_attr.attach_prog_fd = prog->attach_prog_fd;
+ load_attr.attach_btf_id = prog->attach_btf_id;
+ } else {
+ load_attr.kern_version = kern_version;
+ load_attr.prog_ifindex = prog->prog_ifindex;
+ }
/* if .BTF.ext was loaded, kernel supports associated BTF for prog */
if (prog->obj->btf_ext)
btf_fd = bpf_object__btf_fd(prog->obj);
@@ -3699,7 +3705,6 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.line_info_cnt = prog->line_info_cnt;
load_attr.log_level = prog->log_level;
load_attr.prog_flags = prog->prog_flags;
- load_attr.attach_btf_id = prog->attach_btf_id;
retry_load:
log_buf = malloc(log_buf_size);
@@ -3856,8 +3861,9 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
return 0;
}
-static int libbpf_attach_btf_id_by_name(const char *name, __u32 *btf_id);
-
+static int libbpf_find_attach_btf_id(const char *name,
+ enum bpf_attach_type attach_type,
+ __u32 attach_prog_fd);
static struct bpf_object *
__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
struct bpf_object_open_opts *opts)
@@ -3868,6 +3874,7 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
const char *obj_name;
char tmp_name[64];
bool relaxed_maps;
+ __u32 attach_prog_fd;
int err;
if (elf_version(EV_CURRENT) == EV_NONE) {
@@ -3898,6 +3905,7 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false);
relaxed_maps = OPTS_GET(opts, relaxed_maps, false);
pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
+ attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
CHECK_ERR(bpf_object__elf_init(obj), err, out);
CHECK_ERR(bpf_object__check_endianness(obj), err, out);
@@ -3910,7 +3918,6 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
bpf_object__for_each_program(prog, obj) {
enum bpf_prog_type prog_type;
enum bpf_attach_type attach_type;
- __u32 btf_id;
err = libbpf_prog_type_by_name(prog->section_name, &prog_type,
&attach_type);
@@ -3923,10 +3930,13 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
bpf_program__set_type(prog, prog_type);
bpf_program__set_expected_attach_type(prog, attach_type);
if (prog_type == BPF_PROG_TYPE_TRACING) {
- err = libbpf_attach_btf_id_by_name(prog->section_name, &btf_id);
- if (err)
+ err = libbpf_find_attach_btf_id(prog->section_name,
+ attach_type,
+ attach_prog_fd);
+ if (err <= 0)
goto out;
- prog->attach_btf_id = btf_id;
+ prog->attach_btf_id = err;
+ prog->attach_prog_fd = attach_prog_fd;
}
}
@@ -4935,6 +4945,10 @@ static const struct {
BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT),
BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING,
BPF_TRACE_RAW_TP),
+ BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING,
+ BPF_TRACE_FENTRY),
+ BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING,
+ BPF_TRACE_FEXIT),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
@@ -5052,43 +5066,94 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
}
#define BTF_PREFIX "btf_trace_"
-static int libbpf_attach_btf_id_by_name(const char *name, __u32 *btf_id)
+int libbpf_find_vmlinux_btf_id(const char *name,
+ enum bpf_attach_type attach_type)
{
struct btf *btf = bpf_core_find_kernel_btf();
- char raw_tp_btf_name[128] = BTF_PREFIX;
- char *dst = raw_tp_btf_name + sizeof(BTF_PREFIX) - 1;
- int ret, i, err = -EINVAL;
+ char raw_tp_btf[128] = BTF_PREFIX;
+ char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1;
+ const char *btf_name;
+ int err = -EINVAL;
+ u32 kind;
if (IS_ERR(btf)) {
pr_warn("vmlinux BTF is not found\n");
return -EINVAL;
}
- if (!name)
+ if (attach_type == BPF_TRACE_RAW_TP) {
+ /* prepend "btf_trace_" prefix per kernel convention */
+ strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX));
+ btf_name = raw_tp_btf;
+ kind = BTF_KIND_TYPEDEF;
+ } else {
+ btf_name = name;
+ kind = BTF_KIND_FUNC;
+ }
+ err = btf__find_by_name_kind(btf, btf_name, kind);
+ btf__free(btf);
+ return err;
+}
+
+static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
+{
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_prog_info *info;
+ struct btf *btf = NULL;
+ int err = -EINVAL;
+
+ info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ pr_warn("failed get_prog_info_linear for FD %d\n",
+ attach_prog_fd);
+ return -EINVAL;
+ }
+ info = &info_linear->info;
+ if (!info->btf_id) {
+ pr_warn("The target program doesn't have BTF\n");
+ goto out;
+ }
+ if (btf__get_from_id(info->btf_id, &btf)) {
+ pr_warn("Failed to get BTF of the program\n");
goto out;
+ }
+ err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
+ btf__free(btf);
+ if (err <= 0) {
+ pr_warn("%s is not found in prog's BTF\n", name);
+ goto out;
+ }
+out:
+ free(info_linear);
+ return err;
+}
+
+static int libbpf_find_attach_btf_id(const char *name,
+ enum bpf_attach_type attach_type,
+ __u32 attach_prog_fd)
+{
+ int i, err;
+
+ if (!name)
+ return -EINVAL;
for (i = 0; i < ARRAY_SIZE(section_names); i++) {
if (!section_names[i].is_attach_btf)
continue;
if (strncmp(name, section_names[i].sec, section_names[i].len))
continue;
- /* prepend "btf_trace_" prefix per kernel convention */
- strncat(dst, name + section_names[i].len,
- sizeof(raw_tp_btf_name) - sizeof(BTF_PREFIX));
- ret = btf__find_by_name(btf, raw_tp_btf_name);
- if (ret <= 0) {
- pr_warn("%s is not found in vmlinux BTF\n", dst);
- goto out;
- }
- *btf_id = ret;
- err = 0;
- goto out;
+ if (attach_prog_fd)
+ err = libbpf_find_prog_btf_id(name + section_names[i].len,
+ attach_prog_fd);
+ else
+ err = libbpf_find_vmlinux_btf_id(name + section_names[i].len,
+ attach_type);
+ if (err <= 0)
+ pr_warn("%s is not found in vmlinux BTF\n", name);
+ return err;
}
pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
- err = -ESRCH;
-out:
- btf__free(btf);
- return err;
+ return -ESRCH;
}
int libbpf_attach_type_by_name(const char *name,
@@ -5716,6 +5781,37 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
return (struct bpf_link *)link;
}
+struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link_fd *link;
+ int prog_fd, pfd;
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("program '%s': can't attach before loaded\n",
+ bpf_program__title(prog, false));
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = malloc(sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->link.destroy = &bpf_link__destroy_fd;
+
+ pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
+ if (pfd < 0) {
+ pfd = -errno;
+ free(link);
+ pr_warn("program '%s': failed to attach to trace: %s\n",
+ bpf_program__title(prog, false),
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(pfd);
+ }
+ link->fd = pfd;
+ return (struct bpf_link *)link;
+}
+
enum bpf_perf_event_ret
bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
void **copy_mem, size_t *copy_size,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 5aa27caad6c2..0dbf4bfba0c4 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -108,8 +108,9 @@ struct bpf_object_open_opts {
* auto-pinned to that path on load; defaults to "/sys/fs/bpf".
*/
const char *pin_root_path;
+ __u32 attach_prog_fd;
};
-#define bpf_object_open_opts__last_field pin_root_path
+#define bpf_object_open_opts__last_field attach_prog_fd
LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
LIBBPF_API struct bpf_object *
@@ -188,6 +189,8 @@ libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
enum bpf_attach_type *expected_attach_type);
LIBBPF_API int libbpf_attach_type_by_name(const char *name,
enum bpf_attach_type *attach_type);
+LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name,
+ enum bpf_attach_type attach_type);
/* Accessors of bpf_program */
struct bpf_program;
@@ -251,6 +254,8 @@ LIBBPF_API struct bpf_link *
bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
const char *tp_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_trace(struct bpf_program *prog);
struct bpf_insn;
/*
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 9f39ee06b2d4..8ddc2c40e482 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -199,9 +199,12 @@ LIBBPF_0.0.6 {
bpf_map__set_pin_path;
bpf_object__open_file;
bpf_object__open_mem;
+ bpf_program__attach_trace;
bpf_program__get_expected_attach_type;
bpf_program__get_type;
bpf_program__is_tracing;
bpf_program__set_tracing;
bpf_program__size;
+ btf__find_by_name_kind;
+ libbpf_find_vmlinux_btf_id;
} LIBBPF_0.0.5;
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
new file mode 100644
index 000000000000..40bcff2cc274
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+
+void test_fentry_fexit(void)
+{
+ struct bpf_prog_load_attr attr_fentry = {
+ .file = "./fentry_test.o",
+ };
+ struct bpf_prog_load_attr attr_fexit = {
+ .file = "./fexit_test.o",
+ };
+
+ struct bpf_object *obj_fentry = NULL, *obj_fexit = NULL, *pkt_obj;
+ struct bpf_map *data_map_fentry, *data_map_fexit;
+ char fentry_name[] = "fentry/bpf_fentry_testX";
+ char fexit_name[] = "fexit/bpf_fentry_testX";
+ int err, pkt_fd, kfree_skb_fd, i;
+ struct bpf_link *link[12] = {};
+ struct bpf_program *prog[12];
+ __u32 duration, retval;
+ const int zero = 0;
+ u64 result[12];
+
+ err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
+ &pkt_obj, &pkt_fd);
+ if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
+ return;
+ err = bpf_prog_load_xattr(&attr_fentry, &obj_fentry, &kfree_skb_fd);
+ if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
+ goto close_prog;
+ err = bpf_prog_load_xattr(&attr_fexit, &obj_fexit, &kfree_skb_fd);
+ if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ for (i = 0; i < 6; i++) {
+ fentry_name[sizeof(fentry_name) - 2] = '1' + i;
+ prog[i] = bpf_object__find_program_by_title(obj_fentry, fentry_name);
+ if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fentry_name))
+ goto close_prog;
+ link[i] = bpf_program__attach_trace(prog[i]);
+ if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+ goto close_prog;
+ }
+ data_map_fentry = bpf_object__find_map_by_name(obj_fentry, "fentry_t.bss");
+ if (CHECK(!data_map_fentry, "find_data_map", "data map not found\n"))
+ goto close_prog;
+
+ for (i = 6; i < 12; i++) {
+ fexit_name[sizeof(fexit_name) - 2] = '1' + i - 6;
+ prog[i] = bpf_object__find_program_by_title(obj_fexit, fexit_name);
+ if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fexit_name))
+ goto close_prog;
+ link[i] = bpf_program__attach_trace(prog[i]);
+ if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+ goto close_prog;
+ }
+ data_map_fexit = bpf_object__find_map_by_name(obj_fexit, "fexit_te.bss");
+ if (CHECK(!data_map_fexit, "find_data_map", "data map not found\n"))
+ goto close_prog;
+
+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(data_map_fentry), &zero, &result);
+ if (CHECK(err, "get_result",
+ "failed to get output data: %d\n", err))
+ goto close_prog;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(data_map_fexit), &zero, result + 6);
+ if (CHECK(err, "get_result",
+ "failed to get output data: %d\n", err))
+ goto close_prog;
+
+ for (i = 0; i < 12; i++)
+ if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
+ i % 6 + 1, result[i]))
+ goto close_prog;
+
+close_prog:
+ for (i = 0; i < 12; i++)
+ if (!IS_ERR_OR_NULL(link[i]))
+ bpf_link__destroy(link[i]);
+ bpf_object__close(obj_fentry);
+ bpf_object__close(obj_fexit);
+ bpf_object__close(pkt_obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
new file mode 100644
index 000000000000..9fb103193878
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+
+void test_fentry_test(void)
+{
+ struct bpf_prog_load_attr attr = {
+ .file = "./fentry_test.o",
+ };
+
+ char prog_name[] = "fentry/bpf_fentry_testX";
+ struct bpf_object *obj = NULL, *pkt_obj;
+ int err, pkt_fd, kfree_skb_fd, i;
+ struct bpf_link *link[6] = {};
+ struct bpf_program *prog[6];
+ __u32 duration, retval;
+ struct bpf_map *data_map;
+ const int zero = 0;
+ u64 result[6];
+
+ err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
+ &pkt_obj, &pkt_fd);
+ if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
+ return;
+ err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd);
+ if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ for (i = 0; i < 6; i++) {
+ prog_name[sizeof(prog_name) - 2] = '1' + i;
+ prog[i] = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name))
+ goto close_prog;
+ link[i] = bpf_program__attach_trace(prog[i]);
+ if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+ goto close_prog;
+ }
+ data_map = bpf_object__find_map_by_name(obj, "fentry_t.bss");
+ if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ goto close_prog;
+
+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result);
+ if (CHECK(err, "get_result",
+ "failed to get output data: %d\n", err))
+ goto close_prog;
+
+ for (i = 0; i < 6; i++)
+ if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
+ i + 1, result[i]))
+ goto close_prog;
+
+close_prog:
+ for (i = 0; i < 6; i++)
+ if (!IS_ERR_OR_NULL(link[i]))
+ bpf_link__destroy(link[i]);
+ bpf_object__close(obj);
+ bpf_object__close(pkt_obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
new file mode 100644
index 000000000000..15c7378362dd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+
+#define PROG_CNT 3
+
+void test_fexit_bpf2bpf(void)
+{
+ const char *prog_name[PROG_CNT] = {
+ "fexit/test_pkt_access",
+ "fexit/test_pkt_access_subprog1",
+ "fexit/test_pkt_access_subprog2",
+ };
+ struct bpf_object *obj = NULL, *pkt_obj;
+ int err, pkt_fd, i;
+ struct bpf_link *link[PROG_CNT] = {};
+ struct bpf_program *prog[PROG_CNT];
+ __u32 duration, retval;
+ struct bpf_map *data_map;
+ const int zero = 0;
+ u64 result[PROG_CNT];
+
+ err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_UNSPEC,
+ &pkt_obj, &pkt_fd);
+ if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
+ return;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .attach_prog_fd = pkt_fd,
+ );
+
+ obj = bpf_object__open_file("./fexit_bpf2bpf.o", &opts);
+ if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
+ "failed to open fexit_bpf2bpf: %ld\n",
+ PTR_ERR(obj)))
+ goto close_prog;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "obj_load", "err %d\n", err))
+ goto close_prog;
+
+ for (i = 0; i < PROG_CNT; i++) {
+ prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]);
+ if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i]))
+ goto close_prog;
+ link[i] = bpf_program__attach_trace(prog[i]);
+ if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+ goto close_prog;
+ }
+ data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss");
+ if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ goto close_prog;
+
+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result);
+ if (CHECK(err, "get_result",
+ "failed to get output data: %d\n", err))
+ goto close_prog;
+
+ for (i = 0; i < PROG_CNT; i++)
+ if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %ld\n",
+ result[i]))
+ goto close_prog;
+
+close_prog:
+ for (i = 0; i < PROG_CNT; i++)
+ if (!IS_ERR_OR_NULL(link[i]))
+ bpf_link__destroy(link[i]);
+ if (!IS_ERR_OR_NULL(obj))
+ bpf_object__close(obj);
+ bpf_object__close(pkt_obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
new file mode 100644
index 000000000000..3b9dbf7433f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+
+/* x86-64 fits 55 JITed and 43 interpreted progs into half page */
+#define CNT 40
+
+void test_fexit_stress(void)
+{
+ char test_skb[128] = {};
+ int fexit_fd[CNT] = {};
+ int link_fd[CNT] = {};
+ __u32 duration = 0;
+ char error[4096];
+ __u32 prog_ret;
+ int err, i, filter_fd;
+
+ const struct bpf_insn trace_program[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+
+ struct bpf_load_program_attr load_attr = {
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .license = "GPL",
+ .insns = trace_program,
+ .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn),
+ .expected_attach_type = BPF_TRACE_FEXIT,
+ };
+
+ const struct bpf_insn skb_program[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+
+ struct bpf_load_program_attr skb_load_attr = {
+ .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .license = "GPL",
+ .insns = skb_program,
+ .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn),
+ };
+
+ err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1",
+ load_attr.expected_attach_type);
+ if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err))
+ goto out;
+ load_attr.attach_btf_id = err;
+
+ for (i = 0; i < CNT; i++) {
+ fexit_fd[i] = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+ if (CHECK(fexit_fd[i] < 0, "fexit loaded",
+ "failed: %d errno %d\n", fexit_fd[i], errno))
+ goto out;
+ link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]);
+ if (CHECK(link_fd[i] < 0, "fexit attach failed",
+ "prog %d failed: %d err %d\n", i, link_fd[i], errno))
+ goto out;
+ }
+
+ filter_fd = bpf_load_program_xattr(&skb_load_attr, error, sizeof(error));
+ if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
+ filter_fd, errno))
+ goto out;
+
+ err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
+ 0, &prog_ret, 0);
+ close(filter_fd);
+ CHECK_FAIL(err);
+out:
+ for (i = 0; i < CNT; i++) {
+ if (link_fd[i])
+ close(link_fd[i]);
+ if (fexit_fd[i])
+ close(fexit_fd[i]);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
new file mode 100644
index 000000000000..f99013222c74
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+
+void test_fexit_test(void)
+{
+ struct bpf_prog_load_attr attr = {
+ .file = "./fexit_test.o",
+ };
+
+ char prog_name[] = "fexit/bpf_fentry_testX";
+ struct bpf_object *obj = NULL, *pkt_obj;
+ int err, pkt_fd, kfree_skb_fd, i;
+ struct bpf_link *link[6] = {};
+ struct bpf_program *prog[6];
+ __u32 duration, retval;
+ struct bpf_map *data_map;
+ const int zero = 0;
+ u64 result[6];
+
+ err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
+ &pkt_obj, &pkt_fd);
+ if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
+ return;
+ err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd);
+ if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ for (i = 0; i < 6; i++) {
+ prog_name[sizeof(prog_name) - 2] = '1' + i;
+ prog[i] = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name))
+ goto close_prog;
+ link[i] = bpf_program__attach_trace(prog[i]);
+ if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+ goto close_prog;
+ }
+ data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss");
+ if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ goto close_prog;
+
+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result);
+ if (CHECK(err, "get_result",
+ "failed to get output data: %d\n", err))
+ goto close_prog;
+
+ for (i = 0; i < 6; i++)
+ if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
+ i + 1, result[i]))
+ goto close_prog;
+
+close_prog:
+ for (i = 0; i < 6; i++)
+ if (!IS_ERR_OR_NULL(link[i]))
+ bpf_link__destroy(link[i]);
+ bpf_object__close(obj);
+ bpf_object__close(pkt_obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index 55d36856e621..7507c8f689bc 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -60,15 +60,17 @@ void test_kfree_skb(void)
.file = "./kfree_skb.o",
};
+ struct bpf_link *link = NULL, *link_fentry = NULL, *link_fexit = NULL;
+ struct bpf_map *perf_buf_map, *global_data;
+ struct bpf_program *prog, *fentry, *fexit;
struct bpf_object *obj, *obj2 = NULL;
struct perf_buffer_opts pb_opts = {};
struct perf_buffer *pb = NULL;
- struct bpf_link *link = NULL;
- struct bpf_map *perf_buf_map;
- struct bpf_program *prog;
int err, kfree_skb_fd;
bool passed = false;
__u32 duration = 0;
+ const int zero = 0;
+ bool test_ok[2];
err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
&obj, &tattr.prog_fd);
@@ -82,9 +84,28 @@ void test_kfree_skb(void)
prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb");
if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n"))
goto close_prog;
+ fentry = bpf_object__find_program_by_title(obj2, "fentry/eth_type_trans");
+ if (CHECK(!fentry, "find_prog", "prog eth_type_trans not found\n"))
+ goto close_prog;
+ fexit = bpf_object__find_program_by_title(obj2, "fexit/eth_type_trans");
+ if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n"))
+ goto close_prog;
+
+ global_data = bpf_object__find_map_by_name(obj2, "kfree_sk.bss");
+ if (CHECK(!global_data, "find global data", "not found\n"))
+ goto close_prog;
+
link = bpf_program__attach_raw_tracepoint(prog, NULL);
if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
goto close_prog;
+ link_fentry = bpf_program__attach_trace(fentry);
+ if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n",
+ PTR_ERR(link_fentry)))
+ goto close_prog;
+ link_fexit = bpf_program__attach_trace(fexit);
+ if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n",
+ PTR_ERR(link_fexit)))
+ goto close_prog;
perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
@@ -108,14 +129,26 @@ void test_kfree_skb(void)
err = perf_buffer__poll(pb, 100);
if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
goto close_prog;
+
/* make sure kfree_skb program was triggered
* and it sent expected skb into ring buffer
*/
CHECK_FAIL(!passed);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok);
+ if (CHECK(err, "get_result",
+ "failed to get output data: %d\n", err))
+ goto close_prog;
+
+ CHECK_FAIL(!test_ok[0] || !test_ok[1]);
close_prog:
perf_buffer__free(pb);
if (!IS_ERR_OR_NULL(link))
bpf_link__destroy(link);
+ if (!IS_ERR_OR_NULL(link_fentry))
+ bpf_link__destroy(link_fentry);
+ if (!IS_ERR_OR_NULL(link_fexit))
+ bpf_link__destroy(link_fexit);
bpf_object__close(obj);
bpf_object__close(obj2);
}
diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c
new file mode 100644
index 000000000000..545788bf8d50
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fentry_test.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct test1 {
+ ks32 a;
+};
+static volatile __u64 test1_result;
+SEC("fentry/bpf_fentry_test1")
+int test1(struct test1 *ctx)
+{
+ test1_result = ctx->a == 1;
+ return 0;
+}
+
+struct test2 {
+ ks32 a;
+ ku64 b;
+};
+static volatile __u64 test2_result;
+SEC("fentry/bpf_fentry_test2")
+int test2(struct test2 *ctx)
+{
+ test2_result = ctx->a == 2 && ctx->b == 3;
+ return 0;
+}
+
+struct test3 {
+ ks8 a;
+ ks32 b;
+ ku64 c;
+};
+static volatile __u64 test3_result;
+SEC("fentry/bpf_fentry_test3")
+int test3(struct test3 *ctx)
+{
+ test3_result = ctx->a == 4 && ctx->b == 5 && ctx->c == 6;
+ return 0;
+}
+
+struct test4 {
+ void *a;
+ ks8 b;
+ ks32 c;
+ ku64 d;
+};
+static volatile __u64 test4_result;
+SEC("fentry/bpf_fentry_test4")
+int test4(struct test4 *ctx)
+{
+ test4_result = ctx->a == (void *)7 && ctx->b == 8 && ctx->c == 9 &&
+ ctx->d == 10;
+ return 0;
+}
+
+struct test5 {
+ ku64 a;
+ void *b;
+ ks16 c;
+ ks32 d;
+ ku64 e;
+};
+static volatile __u64 test5_result;
+SEC("fentry/bpf_fentry_test5")
+int test5(struct test5 *ctx)
+{
+ test5_result = ctx->a == 11 && ctx->b == (void *)12 && ctx->c == 13 &&
+ ctx->d == 14 && ctx->e == 15;
+ return 0;
+}
+
+struct test6 {
+ ku64 a;
+ void *b;
+ ks16 c;
+ ks32 d;
+ void *e;
+ ks64 f;
+};
+static volatile __u64 test6_result;
+SEC("fentry/bpf_fentry_test6")
+int test6(struct test6 *ctx)
+{
+ test6_result = ctx->a == 16 && ctx->b == (void *)17 && ctx->c == 18 &&
+ ctx->d == 19 && ctx->e == (void *)20 && ctx->f == 21;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
new file mode 100644
index 000000000000..981f0474da5a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct sk_buff {
+ unsigned int len;
+};
+
+struct args {
+ struct sk_buff *skb;
+ ks32 ret;
+};
+static volatile __u64 test_result;
+SEC("fexit/test_pkt_access")
+int test_main(struct args *ctx)
+{
+ struct sk_buff *skb = ctx->skb;
+ int len;
+
+ __builtin_preserve_access_index(({
+ len = skb->len;
+ }));
+ if (len != 74 || ctx->ret != 0)
+ return 0;
+ test_result = 1;
+ return 0;
+}
+
+struct args_subprog1 {
+ struct sk_buff *skb;
+ ks32 ret;
+};
+static volatile __u64 test_result_subprog1;
+SEC("fexit/test_pkt_access_subprog1")
+int test_subprog1(struct args_subprog1 *ctx)
+{
+ struct sk_buff *skb = ctx->skb;
+ int len;
+
+ __builtin_preserve_access_index(({
+ len = skb->len;
+ }));
+ if (len != 74 || ctx->ret != 148)
+ return 0;
+ test_result_subprog1 = 1;
+ return 0;
+}
+
+/* Though test_pkt_access_subprog2() is defined in C as:
+ * static __attribute__ ((noinline))
+ * int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb)
+ * {
+ * return skb->len * val;
+ * }
+ * llvm optimizations remove 'int val' argument and generate BPF assembly:
+ * r0 = *(u32 *)(r1 + 0)
+ * w0 <<= 1
+ * exit
+ * In such case the verifier falls back to conservative and
+ * tracing program can access arguments and return value as u64
+ * instead of accurate types.
+ */
+struct args_subprog2 {
+ ku64 args[5];
+ ku64 ret;
+};
+static volatile __u64 test_result_subprog2;
+SEC("fexit/test_pkt_access_subprog2")
+int test_subprog2(struct args_subprog2 *ctx)
+{
+ struct sk_buff *skb = (void *)ctx->args[0];
+ __u64 ret;
+ int len;
+
+ bpf_probe_read_kernel(&len, sizeof(len),
+ __builtin_preserve_access_index(&skb->len));
+
+ ret = ctx->ret;
+ /* bpf_prog_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32
+ * which randomizes upper 32 bits after BPF_ALU32 insns.
+ * Hence after 'w0 <<= 1' upper bits of $rax are random.
+ * That is expected and correct. Trim them.
+ */
+ ret = (__u32) ret;
+ if (len != 74 || ret != 148)
+ return 0;
+ test_result_subprog2 = 1;
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
new file mode 100644
index 000000000000..8b98b1a51784
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_test.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct test1 {
+ ks32 a;
+ ks32 ret;
+};
+static volatile __u64 test1_result;
+SEC("fexit/bpf_fentry_test1")
+int test1(struct test1 *ctx)
+{
+ test1_result = ctx->a == 1 && ctx->ret == 2;
+ return 0;
+}
+
+struct test2 {
+ ks32 a;
+ ku64 b;
+ ks32 ret;
+};
+static volatile __u64 test2_result;
+SEC("fexit/bpf_fentry_test2")
+int test2(struct test2 *ctx)
+{
+ test2_result = ctx->a == 2 && ctx->b == 3 && ctx->ret == 5;
+ return 0;
+}
+
+struct test3 {
+ ks8 a;
+ ks32 b;
+ ku64 c;
+ ks32 ret;
+};
+static volatile __u64 test3_result;
+SEC("fexit/bpf_fentry_test3")
+int test3(struct test3 *ctx)
+{
+ test3_result = ctx->a == 4 && ctx->b == 5 && ctx->c == 6 &&
+ ctx->ret == 15;
+ return 0;
+}
+
+struct test4 {
+ void *a;
+ ks8 b;
+ ks32 c;
+ ku64 d;
+ ks32 ret;
+};
+static volatile __u64 test4_result;
+SEC("fexit/bpf_fentry_test4")
+int test4(struct test4 *ctx)
+{
+ test4_result = ctx->a == (void *)7 && ctx->b == 8 && ctx->c == 9 &&
+ ctx->d == 10 && ctx->ret == 34;
+ return 0;
+}
+
+struct test5 {
+ ku64 a;
+ void *b;
+ ks16 c;
+ ks32 d;
+ ku64 e;
+ ks32 ret;
+};
+static volatile __u64 test5_result;
+SEC("fexit/bpf_fentry_test5")
+int test5(struct test5 *ctx)
+{
+ test5_result = ctx->a == 11 && ctx->b == (void *)12 && ctx->c == 13 &&
+ ctx->d == 14 && ctx->e == 15 && ctx->ret == 65;
+ return 0;
+}
+
+struct test6 {
+ ku64 a;
+ void *b;
+ ks16 c;
+ ks32 d;
+ void *e;
+ ks64 f;
+ ks32 ret;
+};
+static volatile __u64 test6_result;
+SEC("fexit/bpf_fentry_test6")
+int test6(struct test6 *ctx)
+{
+ test6_result = ctx->a == 16 && ctx->b == (void *)17 && ctx->c == 18 &&
+ ctx->d == 19 && ctx->e == (void *)20 && ctx->f == 21 &&
+ ctx->ret == 111;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
index f769fdbf6725..dcc9feac8338 100644
--- a/tools/testing/selftests/bpf/progs/kfree_skb.c
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
+#include <stdbool.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
@@ -116,3 +117,54 @@ int trace_kfree_skb(struct trace_kfree_skb *ctx)
&meta, sizeof(meta));
return 0;
}
+
+static volatile struct {
+ bool fentry_test_ok;
+ bool fexit_test_ok;
+} result;
+
+struct eth_type_trans_args {
+ struct sk_buff *skb;
+ struct net_device *dev;
+ unsigned short protocol; /* return value available to fexit progs */
+};
+
+SEC("fentry/eth_type_trans")
+int fentry_eth_type_trans(struct eth_type_trans_args *ctx)
+{
+ struct sk_buff *skb = ctx->skb;
+ struct net_device *dev = ctx->dev;
+ int len, ifindex;
+
+ __builtin_preserve_access_index(({
+ len = skb->len;
+ ifindex = dev->ifindex;
+ }));
+
+ /* fentry sees full packet including L2 header */
+ if (len != 74 || ifindex != 1)
+ return 0;
+ result.fentry_test_ok = true;
+ return 0;
+}
+
+SEC("fexit/eth_type_trans")
+int fexit_eth_type_trans(struct eth_type_trans_args *ctx)
+{
+ struct sk_buff *skb = ctx->skb;
+ struct net_device *dev = ctx->dev;
+ int len, ifindex;
+
+ __builtin_preserve_access_index(({
+ len = skb->len;
+ ifindex = dev->ifindex;
+ }));
+
+ /* fexit sees packet without L2 header that eth_type_trans should have
+ * consumed.
+ */
+ if (len != 60 || ctx->protocol != bpf_htons(0x86dd) || ifindex != 1)
+ return 0;
+ result.fexit_test_ok = true;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index 7cf42d14103f..3a7b4b607ed3 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -17,8 +17,38 @@
#define barrier() __asm__ __volatile__("": : :"memory")
int _version SEC("version") = 1;
-SEC("test1")
-int process(struct __sk_buff *skb)
+/* llvm will optimize both subprograms into exactly the same BPF assembly
+ *
+ * Disassembly of section .text:
+ *
+ * 0000000000000000 test_pkt_access_subprog1:
+ * ; return skb->len * 2;
+ * 0: 61 10 00 00 00 00 00 00 r0 = *(u32 *)(r1 + 0)
+ * 1: 64 00 00 00 01 00 00 00 w0 <<= 1
+ * 2: 95 00 00 00 00 00 00 00 exit
+ *
+ * 0000000000000018 test_pkt_access_subprog2:
+ * ; return skb->len * val;
+ * 3: 61 10 00 00 00 00 00 00 r0 = *(u32 *)(r1 + 0)
+ * 4: 64 00 00 00 01 00 00 00 w0 <<= 1
+ * 5: 95 00 00 00 00 00 00 00 exit
+ *
+ * Which makes it an interesting test for BTF-enabled verifier.
+ */
+static __attribute__ ((noinline))
+int test_pkt_access_subprog1(volatile struct __sk_buff *skb)
+{
+ return skb->len * 2;
+}
+
+static __attribute__ ((noinline))
+int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb)
+{
+ return skb->len * val;
+}
+
+SEC("classifier/test_pkt_access")
+int test_pkt_access(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
@@ -48,6 +78,10 @@ int process(struct __sk_buff *skb)
tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len);
}
+ if (test_pkt_access_subprog1(skb) != skb->len * 2)
+ return TC_ACT_SHOT;
+ if (test_pkt_access_subprog2(2, skb) != skb->len * 2)
+ return TC_ACT_SHOT;
if (tcp) {
if (((void *)(tcp) + 20) > data_end || proto != 6)
return TC_ACT_SHOT;