summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2020-10-02 15:05:37 -0700
committerAlexei Starovoitov <ast@kernel.org>2020-10-02 15:05:37 -0700
commit60a128b532d88ac78d8dd60577700dea70ee8c38 (patch)
treee3d0e30829fc1afe77a5a0305216e6ef2b93a381 /kernel
parent440c5752a3cad6ec303613e0446adde790dc39be (diff)
parent00dc73e44a846fc5310df0e1415a90af76cc135e (diff)
Merge branch 'bpf: BTF support for ksyms'
Hao Luo says: ==================== v3 -> v4: - Rebasing - Cast bpf_[per|this]_cpu_ptr's parameter to void __percpu * before passing into per_cpu_ptr. v2 -> v3: - Rename functions and variables in verifier for better readability. - Stick to logging message convention in libbpf. - Move bpf_per_cpu_ptr and bpf_this_cpu_ptr from trace-specific helper set to base helper set. - More specific test in ksyms_btf. - Fix return type cast in bpf_*_cpu_ptr. - Fix btf leak in ksyms_btf selftest. - Fix return error code for kallsyms_find(). v1 -> v2: - Move check_pseudo_btf_id from check_ld_imm() to replace_map_fd_with_map_ptr() and rename the latter. - Add bpf_this_cpu_ptr(). - Use bpf_core_types_are_compat() in libbpf.c for checking type compatibility. - Rewrite typed ksym extern type in BTF with int to save space. - Minor revision of bpf_per_cpu_ptr()'s comments. - Avoid using long in tests that use skeleton. - Refactored test_ksyms.c by moving kallsyms_find() to trace_helpers.c - Fold the patches that sync include/linux/uapi and tools/include/linux/uapi. rfc -> v1: - Encode VAR's btf_id for PSEUDO_BTF_ID. - More checks in verifier. Checking the btf_id passed as PSEUDO_BTF_ID is valid VAR, its name and type. - Checks in libbpf on type compatibility of ksyms. - Add bpf_per_cpu_ptr() to access kernel percpu vars. Introduced new ARG and RET types for this helper. This patch series extends the previously added __ksym externs with btf support. Right now the __ksym externs are treated as pure 64-bit scalar value. Libbpf replaces ld_imm64 insn of __ksym by its kernel address at load time. This patch series extend those externs with their btf info. Note that btf support for __ksym must come with the kernel btf that has VARs encoded to work properly. The corresponding chagnes in pahole is available at [1] (with a fix at [2] for gcc 4.9+). The first 3 patches in this series add support for general kernel global variables, which include verifier checking (01/06), libpf support (02/06) and selftests for getting typed ksym extern's kernel address (03/06). The next 3 patches extends that capability further by introducing helpers bpf_per_cpu_ptr() and bpf_this_cpu_ptr(), which allows accessing kernel percpu variables correctly (04/06 and 05/06). The tests of this feature were performed against pahole that is extended with [1] and [2]. For kernel BTF that does not have VARs encoded, the selftests will be skipped. [1] https://git.kernel.org/pub/scm/devel/pahole/pahole.git/commit/?id=f3d9054ba8ff1df0fc44e507e3a01c0964cabd42 [2] https://www.spinics.net/lists/dwarves/msg00451.html ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c25
-rw-r--r--kernel/bpf/helpers.c32
-rw-r--r--kernel/bpf/verifier.c190
-rw-r--r--kernel/trace/bpf_trace.c4
4 files changed, 212 insertions, 39 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4d0ee7839fdb..ed7d02e8bc93 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -188,11 +188,6 @@
i < btf_type_vlen(struct_type); \
i++, member++)
-#define for_each_vsi(i, struct_type, member) \
- for (i = 0, member = btf_type_var_secinfo(struct_type); \
- i < btf_type_vlen(struct_type); \
- i++, member++)
-
#define for_each_vsi_from(i, from, struct_type, member) \
for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
i < btf_type_vlen(struct_type); \
@@ -440,16 +435,6 @@ static bool btf_type_nosize_or_null(const struct btf_type *t)
return !t || btf_type_nosize(t);
}
-/* union is only a special case of struct:
- * all its offsetof(member) == 0
- */
-static bool btf_type_is_struct(const struct btf_type *t)
-{
- u8 kind = BTF_INFO_KIND(t->info);
-
- return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
-}
-
static bool __btf_type_is_struct(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
@@ -460,11 +445,6 @@ static bool btf_type_is_array(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
}
-static bool btf_type_is_var(const struct btf_type *t)
-{
- return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
-}
-
static bool btf_type_is_datasec(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
@@ -613,11 +593,6 @@ static const struct btf_var *btf_type_var(const struct btf_type *t)
return (const struct btf_var *)(t + 1);
}
-static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
-{
- return (const struct btf_var_secinfo *)(t + 1);
-}
-
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
{
return kind_ops[BTF_INFO_KIND(t->info)];
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index e825441781ab..25520f5eeaf6 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -623,6 +623,34 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
+{
+ if (cpu >= nr_cpu_ids)
+ return (unsigned long)NULL;
+
+ return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
+}
+
+const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
+ .func = bpf_per_cpu_ptr,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
+{
+ return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
+}
+
+const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
+ .func = bpf_this_cpu_ptr,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID,
+ .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
+};
+
const struct bpf_func_proto bpf_get_current_task_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
@@ -689,6 +717,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_snprintf_btf_proto;
case BPF_FUNC_jiffies64:
return &bpf_jiffies64_proto;
+ case BPF_FUNC_bpf_per_cpu_ptr:
+ return &bpf_per_cpu_ptr_proto;
+ case BPF_FUNC_bpf_this_cpu_ptr:
+ return &bpf_this_cpu_ptr_proto;
default:
break;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 015a1c074b6b..d9dbf271ebab 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -238,6 +238,8 @@ struct bpf_call_arg_meta {
u64 msize_max_value;
int ref_obj_id;
int func_id;
+ u32 btf_id;
+ u32 ret_btf_id;
};
struct btf *btf_vmlinux;
@@ -517,6 +519,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_XDP_SOCK] = "xdp_sock",
[PTR_TO_BTF_ID] = "ptr_",
[PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
+ [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
[PTR_TO_MEM] = "mem",
[PTR_TO_MEM_OR_NULL] = "mem_or_null",
[PTR_TO_RDONLY_BUF] = "rdonly_buf",
@@ -583,7 +586,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
/* reg->off should be 0 for SCALAR_VALUE */
verbose(env, "%lld", reg->var_off.value + reg->off);
} else {
- if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
+ if (t == PTR_TO_BTF_ID ||
+ t == PTR_TO_BTF_ID_OR_NULL ||
+ t == PTR_TO_PERCPU_BTF_ID)
verbose(env, "%s", kernel_type_name(reg->btf_id));
verbose(env, "(id=%d", reg->id);
if (reg_type_may_be_refcounted_or_null(t))
@@ -2204,6 +2209,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_RDONLY_BUF_OR_NULL:
case PTR_TO_RDWR_BUF:
case PTR_TO_RDWR_BUF_OR_NULL:
+ case PTR_TO_PERCPU_BTF_ID:
return true;
default:
return false;
@@ -4017,6 +4023,7 @@ static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
+static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
@@ -4042,6 +4049,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
[ARG_PTR_TO_INT] = &int_ptr_types,
[ARG_PTR_TO_LONG] = &int_ptr_types,
+ [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -4205,6 +4213,12 @@ skip_type_check:
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
meta);
+ } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
+ if (!reg->btf_id) {
+ verbose(env, "Helper has invalid btf_id in R%d\n", regno);
+ return -EACCES;
+ }
+ meta->ret_btf_id = reg->btf_id;
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
@@ -5114,6 +5128,35 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
regs[BPF_REG_0].id = ++env->id_gen;
regs[BPF_REG_0].mem_size = meta.mem_size;
+ } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
+ fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
+ const struct btf_type *t;
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
+ if (!btf_type_is_struct(t)) {
+ u32 tsize;
+ const struct btf_type *ret;
+ const char *tname;
+
+ /* resolve the type size of ksym. */
+ ret = btf_resolve_size(btf_vmlinux, t, &tsize);
+ if (IS_ERR(ret)) {
+ tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ verbose(env, "unable to resolve the size of type '%s': %ld\n",
+ tname, PTR_ERR(ret));
+ return -EINVAL;
+ }
+ regs[BPF_REG_0].type =
+ fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
+ PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
+ regs[BPF_REG_0].mem_size = tsize;
+ } else {
+ regs[BPF_REG_0].type =
+ fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
+ PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
+ regs[BPF_REG_0].btf_id = meta.ret_btf_id;
+ }
} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
int ret_btf_id;
@@ -7488,6 +7531,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
struct bpf_insn_aux_data *aux = cur_aux(env);
struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_reg_state *dst_reg;
struct bpf_map *map;
int err;
@@ -7504,25 +7548,45 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (err)
return err;
+ dst_reg = &regs[insn->dst_reg];
if (insn->src_reg == 0) {
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
- regs[insn->dst_reg].type = SCALAR_VALUE;
+ dst_reg->type = SCALAR_VALUE;
__mark_reg_known(&regs[insn->dst_reg], imm);
return 0;
}
+ if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
+ mark_reg_known_zero(env, regs, insn->dst_reg);
+
+ dst_reg->type = aux->btf_var.reg_type;
+ switch (dst_reg->type) {
+ case PTR_TO_MEM:
+ dst_reg->mem_size = aux->btf_var.mem_size;
+ break;
+ case PTR_TO_BTF_ID:
+ case PTR_TO_PERCPU_BTF_ID:
+ dst_reg->btf_id = aux->btf_var.btf_id;
+ break;
+ default:
+ verbose(env, "bpf verifier is misconfigured\n");
+ return -EFAULT;
+ }
+ return 0;
+ }
+
map = env->used_maps[aux->map_index];
mark_reg_known_zero(env, regs, insn->dst_reg);
- regs[insn->dst_reg].map_ptr = map;
+ dst_reg->map_ptr = map;
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
- regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
- regs[insn->dst_reg].off = aux->map_off;
+ dst_reg->type = PTR_TO_MAP_VALUE;
+ dst_reg->off = aux->map_off;
if (map_value_has_spin_lock(map))
- regs[insn->dst_reg].id = ++env->id_gen;
+ dst_reg->id = ++env->id_gen;
} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
- regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
+ dst_reg->type = CONST_PTR_TO_MAP;
} else {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
@@ -9424,6 +9488,92 @@ process_bpf_exit:
return 0;
}
+/* replace pseudo btf_id with kernel symbol address */
+static int check_pseudo_btf_id(struct bpf_verifier_env *env,
+ struct bpf_insn *insn,
+ struct bpf_insn_aux_data *aux)
+{
+ u32 datasec_id, type, id = insn->imm;
+ const struct btf_var_secinfo *vsi;
+ const struct btf_type *datasec;
+ const struct btf_type *t;
+ const char *sym_name;
+ bool percpu = false;
+ u64 addr;
+ int i;
+
+ if (!btf_vmlinux) {
+ verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
+ return -EINVAL;
+ }
+
+ if (insn[1].imm != 0) {
+ verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
+ return -EINVAL;
+ }
+
+ t = btf_type_by_id(btf_vmlinux, id);
+ if (!t) {
+ verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
+ return -ENOENT;
+ }
+
+ if (!btf_type_is_var(t)) {
+ verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
+ id);
+ return -EINVAL;
+ }
+
+ sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
+ addr = kallsyms_lookup_name(sym_name);
+ if (!addr) {
+ verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
+ sym_name);
+ return -ENOENT;
+ }
+
+ datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
+ BTF_KIND_DATASEC);
+ if (datasec_id > 0) {
+ datasec = btf_type_by_id(btf_vmlinux, datasec_id);
+ for_each_vsi(i, datasec, vsi) {
+ if (vsi->type == id) {
+ percpu = true;
+ break;
+ }
+ }
+ }
+
+ insn[0].imm = (u32)addr;
+ insn[1].imm = addr >> 32;
+
+ type = t->type;
+ t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
+ if (percpu) {
+ aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
+ aux->btf_var.btf_id = type;
+ } else if (!btf_type_is_struct(t)) {
+ const struct btf_type *ret;
+ const char *tname;
+ u32 tsize;
+
+ /* resolve the type size of ksym. */
+ ret = btf_resolve_size(btf_vmlinux, t, &tsize);
+ if (IS_ERR(ret)) {
+ tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
+ tname, PTR_ERR(ret));
+ return -EINVAL;
+ }
+ aux->btf_var.reg_type = PTR_TO_MEM;
+ aux->btf_var.mem_size = tsize;
+ } else {
+ aux->btf_var.reg_type = PTR_TO_BTF_ID;
+ aux->btf_var.btf_id = type;
+ }
+ return 0;
+}
+
static int check_map_prealloc(struct bpf_map *map)
{
return (map->map_type != BPF_MAP_TYPE_HASH &&
@@ -9534,10 +9684,14 @@ static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
}
-/* look for pseudo eBPF instructions that access map FDs and
- * replace them with actual map pointers
+/* find and rewrite pseudo imm in ld_imm64 instructions:
+ *
+ * 1. if it accesses map FD, replace it with actual map pointer.
+ * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
+ *
+ * NOTE: btf_vmlinux is required for converting pseudo btf_id.
*/
-static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
+static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
@@ -9578,6 +9732,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
/* valid generic load 64-bit imm */
goto next_insn;
+ if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
+ aux = &env->insn_aux_data[i];
+ err = check_pseudo_btf_id(env, insn, aux);
+ if (err)
+ return err;
+ goto next_insn;
+ }
+
/* In final convert_pseudo_ld_imm64() step, this is
* converted into regular 64-bit imm load insn.
*/
@@ -11633,10 +11795,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
- ret = replace_map_fd_with_map_ptr(env);
- if (ret < 0)
- goto skip_full_check;
-
if (bpf_prog_is_dev_bound(env->prog->aux)) {
ret = bpf_prog_offload_verifier_prep(env->prog);
if (ret)
@@ -11662,6 +11820,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
if (ret)
goto skip_full_check;
+ ret = resolve_pseudo_ldimm64(env);
+ if (ret < 0)
+ goto skip_full_check;
+
ret = check_cfg(env);
if (ret < 0)
goto skip_full_check;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e118a83439c3..a136a6a63a71 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1327,6 +1327,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
+ case BPF_FUNC_bpf_per_cpu_ptr:
+ return &bpf_per_cpu_ptr_proto;
+ case BPF_FUNC_bpf_this_cpu_ptr:
+ return &bpf_this_cpu_ptr_proto;
default:
return NULL;
}