From 6e22ab9da79343532cd3cde39df25e5a5478c692 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Aug 2020 21:21:20 +0200 Subject: bpf: Add d_path helper Adding d_path helper function that returns full path for given 'struct path' object, which needs to be the kernel BTF 'path' object. The path is returned in buffer provided 'buf' of size 'sz' and is zero terminated. bpf_d_path(&file->f_path, buf, size); The helper calls directly d_path function, so there's only limited set of function it can be called from. Adding just very modest set for the start. Updating also bpf.h tools uapi header and adding 'path' to bpf_helpers_doc.py script. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20200825192124.710397-11-jolsa@kernel.org --- scripts/bpf_helpers_doc.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'scripts/bpf_helpers_doc.py') diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 5bfa448b4704..08388173973f 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -432,6 +432,7 @@ class PrinterHelpers(Printer): 'struct __sk_buff', 'struct sk_msg_md', 'struct xdp_md', + 'struct path', ] known_types = { '...', @@ -472,6 +473,7 @@ class PrinterHelpers(Printer): 'struct tcp_request_sock', 'struct udp6_sock', 'struct task_struct', + 'struct path', } mapped_types = { 'u8': '__u8', -- cgit v1.2.3 From c4d0bfb45068d853a478b9067a95969b1886a30f Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 28 Sep 2020 12:31:05 +0100 Subject: bpf: Add bpf_snprintf_btf helper A helper is added to support tracing kernel type information in BPF using the BPF Type Format (BTF). Its signature is long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags); struct btf_ptr * specifies - a pointer to the data to be traced - the BTF id of the type of data pointed to - a flags field is provided for future use; these flags are not to be confused with the BTF_F_* flags below that control how the btf_ptr is displayed; the flags member of the struct btf_ptr may be used to disambiguate types in kernel versus module BTF, etc; the main distinction is the flags relate to the type and information needed in identifying it; not how it is displayed. For example a BPF program with a struct sk_buff *skb could do the following: static struct btf_ptr b = { }; b.ptr = skb; b.type_id = __builtin_btf_type_id(struct sk_buff, 1); bpf_snprintf_btf(str, sizeof(str), &b, sizeof(b), 0, 0); Default output looks like this: (struct sk_buff){ .transport_header = (__u16)65535, .mac_header = (__u16)65535, .end = (sk_buff_data_t)192, .head = (unsigned char *)0x000000007524fd8b, .data = (unsigned char *)0x000000007524fd8b, .truesize = (unsigned int)768, .users = (refcount_t){ .refs = (atomic_t){ .counter = (int)1, }, }, } Flags modifying display are as follows: - BTF_F_COMPACT: no formatting around type information - BTF_F_NONAME: no struct/union member names/types - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; equivalent to %px. - BTF_F_ZERO: show zero-valued struct/union members; they are not displayed by default Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601292670-1616-4-git-send-email-alan.maguire@oracle.com --- include/linux/bpf.h | 1 + include/linux/btf.h | 9 +++--- include/uapi/linux/bpf.h | 67 ++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 4 +++ kernel/trace/bpf_trace.c | 65 ++++++++++++++++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 2 ++ tools/include/uapi/linux/bpf.h | 67 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 212 insertions(+), 4 deletions(-) (limited to 'scripts/bpf_helpers_doc.py') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e620a4b1290f..768b533ba48e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1822,6 +1822,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; +extern const struct bpf_func_proto bpf_snprintf_btf_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/btf.h b/include/linux/btf.h index d0f5d3c9ec3d..3e5cdc2ba963 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -6,6 +6,7 @@ #include #include +#include #define BTF_TYPE_EMIT(type) ((void)(type *)0) @@ -59,10 +60,10 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, * - BTF_SHOW_UNSAFE: skip use of bpf_probe_read() to safely read * data before displaying it. */ -#define BTF_SHOW_COMPACT (1ULL << 0) -#define BTF_SHOW_NONAME (1ULL << 1) -#define BTF_SHOW_PTR_RAW (1ULL << 2) -#define BTF_SHOW_ZERO (1ULL << 3) +#define BTF_SHOW_COMPACT BTF_F_COMPACT +#define BTF_SHOW_NONAME BTF_F_NONAME +#define BTF_SHOW_PTR_RAW BTF_F_PTR_RAW +#define BTF_SHOW_ZERO BTF_F_ZERO #define BTF_SHOW_UNSAFE (1ULL << 4) void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 82522f05c021..cca9eb1b13e5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3594,6 +3594,42 @@ union bpf_attr { * the data in *dst*. This is a wrapper of **copy_from_user**\ (). * Return * 0 on success, or a negative error in case of failure. + * + * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags) + * Description + * Use BTF to store a string representation of *ptr*->ptr in *str*, + * using *ptr*->type_id. This value should specify the type + * that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1) + * can be used to look up vmlinux BTF type ids. Traversing the + * data structure using BTF, the type information and values are + * stored in the first *str_size* - 1 bytes of *str*. Safe copy of + * the pointer data is carried out to avoid kernel crashes during + * operation. Smaller types can use string space on the stack; + * larger programs can use map data to store the string + * representation. + * + * The string can be subsequently shared with userspace via + * bpf_perf_event_output() or ring buffer interfaces. + * bpf_trace_printk() is to be avoided as it places too small + * a limit on string size to be useful. + * + * *flags* is a combination of + * + * **BTF_F_COMPACT** + * no formatting around type information + * **BTF_F_NONAME** + * no struct/union member names/types + * **BTF_F_PTR_RAW** + * show raw (unobfuscated) pointer values; + * equivalent to printk specifier %px. + * **BTF_F_ZERO** + * show zero-valued struct/union members; they + * are not displayed by default + * + * Return + * The number of bytes that were written (or would have been + * written if output had to be truncated due to string size), + * or a negative error in cases of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3745,6 +3781,7 @@ union bpf_attr { FN(inode_storage_delete), \ FN(d_path), \ FN(copy_from_user), \ + FN(snprintf_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4853,4 +4890,34 @@ struct bpf_sk_lookup { __u32 local_port; /* Host byte order */ }; +/* + * struct btf_ptr is used for typed pointer representation; the + * type id is used to render the pointer data as the appropriate type + * via the bpf_snprintf_btf() helper described above. A flags field - + * potentially to specify additional details about the BTF pointer + * (rather than its mode of display) - is included for future use. + * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately. + */ +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; /* BTF ptr flags; unused at present. */ +}; + +/* + * Flags to control bpf_snprintf_btf() behaviour. + * - BTF_F_COMPACT: no formatting around type information + * - BTF_F_NONAME: no struct/union member names/types + * - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; + * equivalent to %px. + * - BTF_F_ZERO: show zero-valued struct/union members; they + * are not displayed by default + */ +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c4811b139caa..403fb2341a86 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2216,6 +2216,7 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak; const struct bpf_func_proto bpf_get_local_storage_proto __weak; const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak; +const struct bpf_func_proto bpf_snprintf_btf_proto __weak; const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5cc7425ee476..e825441781ab 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -683,6 +683,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) if (!perfmon_capable()) return NULL; return bpf_get_trace_printk_proto(); + case BPF_FUNC_snprintf_btf: + if (!perfmon_capable()) + return NULL; + return &bpf_snprintf_btf_proto; case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; default: diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 2834866d379a..140e1be9dab6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,9 @@ #include #include +#include +#include + #include #include "trace_probe.h" @@ -1147,6 +1151,65 @@ static const struct bpf_func_proto bpf_d_path_proto = { .allowed = bpf_d_path_allowed, }; +#define BTF_F_ALL (BTF_F_COMPACT | BTF_F_NONAME | \ + BTF_F_PTR_RAW | BTF_F_ZERO) + +static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, + u64 flags, const struct btf **btf, + s32 *btf_id) +{ + const struct btf_type *t; + + if (unlikely(flags & ~(BTF_F_ALL))) + return -EINVAL; + + if (btf_ptr_size != sizeof(struct btf_ptr)) + return -EINVAL; + + *btf = bpf_get_btf_vmlinux(); + + if (IS_ERR_OR_NULL(*btf)) + return PTR_ERR(*btf); + + if (ptr->type_id > 0) + *btf_id = ptr->type_id; + else + return -EINVAL; + + if (*btf_id > 0) + t = btf_type_by_id(*btf, *btf_id); + if (*btf_id <= 0 || !t) + return -ENOENT; + + return 0; +} + +BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr, + u32, btf_ptr_size, u64, flags) +{ + const struct btf *btf; + s32 btf_id; + int ret; + + ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); + if (ret) + return ret; + + return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size, + flags); +} + +const struct bpf_func_proto bpf_snprintf_btf_proto = { + .func = bpf_snprintf_btf, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_PTR_TO_MEM, + .arg4_type = ARG_CONST_SIZE, + .arg5_type = ARG_ANYTHING, +}; + const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1233,6 +1296,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_task_stack_proto; case BPF_FUNC_copy_from_user: return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; + case BPF_FUNC_snprintf_btf: + return &bpf_snprintf_btf_proto; default: return NULL; } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 08388173973f..7d86fdd190be 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -433,6 +433,7 @@ class PrinterHelpers(Printer): 'struct sk_msg_md', 'struct xdp_md', 'struct path', + 'struct btf_ptr', ] known_types = { '...', @@ -474,6 +475,7 @@ class PrinterHelpers(Printer): 'struct udp6_sock', 'struct task_struct', 'struct path', + 'struct btf_ptr', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 82522f05c021..cca9eb1b13e5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3594,6 +3594,42 @@ union bpf_attr { * the data in *dst*. This is a wrapper of **copy_from_user**\ (). * Return * 0 on success, or a negative error in case of failure. + * + * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags) + * Description + * Use BTF to store a string representation of *ptr*->ptr in *str*, + * using *ptr*->type_id. This value should specify the type + * that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1) + * can be used to look up vmlinux BTF type ids. Traversing the + * data structure using BTF, the type information and values are + * stored in the first *str_size* - 1 bytes of *str*. Safe copy of + * the pointer data is carried out to avoid kernel crashes during + * operation. Smaller types can use string space on the stack; + * larger programs can use map data to store the string + * representation. + * + * The string can be subsequently shared with userspace via + * bpf_perf_event_output() or ring buffer interfaces. + * bpf_trace_printk() is to be avoided as it places too small + * a limit on string size to be useful. + * + * *flags* is a combination of + * + * **BTF_F_COMPACT** + * no formatting around type information + * **BTF_F_NONAME** + * no struct/union member names/types + * **BTF_F_PTR_RAW** + * show raw (unobfuscated) pointer values; + * equivalent to printk specifier %px. + * **BTF_F_ZERO** + * show zero-valued struct/union members; they + * are not displayed by default + * + * Return + * The number of bytes that were written (or would have been + * written if output had to be truncated due to string size), + * or a negative error in cases of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3745,6 +3781,7 @@ union bpf_attr { FN(inode_storage_delete), \ FN(d_path), \ FN(copy_from_user), \ + FN(snprintf_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4853,4 +4890,34 @@ struct bpf_sk_lookup { __u32 local_port; /* Host byte order */ }; +/* + * struct btf_ptr is used for typed pointer representation; the + * type id is used to render the pointer data as the appropriate type + * via the bpf_snprintf_btf() helper described above. A flags field - + * potentially to specify additional details about the BTF pointer + * (rather than its mode of display) - is included for future use. + * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately. + */ +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; /* BTF ptr flags; unused at present. */ +}; + +/* + * Flags to control bpf_snprintf_btf() behaviour. + * - BTF_F_COMPACT: no formatting around type information + * - BTF_F_NONAME: no struct/union member names/types + * - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; + * equivalent to %px. + * - BTF_F_ZERO: show zero-valued struct/union members; they + * are not displayed by default + */ +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From ba452c9e996d8a4c347b32805f91abb70de5de7e Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 20 Oct 2020 23:25:56 +0200 Subject: bpf: Fix bpf_redirect_neigh helper api to support supplying nexthop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on the discussion in [0], update the bpf_redirect_neigh() helper to accept an optional parameter specifying the nexthop information. This makes it possible to combine bpf_fib_lookup() and bpf_redirect_neigh() without incurring a duplicate FIB lookup - since the FIB lookup helper will return the nexthop information even if no neighbour is present, this can simply be passed on to bpf_redirect_neigh() if bpf_fib_lookup() returns BPF_FIB_LKUP_RET_NO_NEIGH. Thus fix & extend it before helper API is frozen. [0] https://lore.kernel.org/bpf/393e17fc-d187-3a8d-2f0d-a627c7c63fca@iogearbox.net/ Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Reviewed-by: David Ahern Link: https://lore.kernel.org/bpf/160322915615.32199.1187570224032024535.stgit@toke.dk --- include/linux/filter.h | 9 +++ include/uapi/linux/bpf.h | 22 ++++-- net/core/filter.c | 158 ++++++++++++++++++++++++++--------------- scripts/bpf_helpers_doc.py | 1 + tools/include/uapi/linux/bpf.h | 22 ++++-- 5 files changed, 145 insertions(+), 67 deletions(-) (limited to 'scripts/bpf_helpers_doc.py') diff --git a/include/linux/filter.h b/include/linux/filter.h index 20fc24c9779a..72d62cbc1578 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -607,12 +607,21 @@ struct bpf_skb_data_end { void *data_end; }; +struct bpf_nh_params { + u32 nh_family; + union { + u32 ipv4_nh; + struct in6_addr ipv6_nh; + }; +}; + struct bpf_redirect_info { u32 flags; u32 tgt_index; void *tgt_value; struct bpf_map *map; u32 kern_flags; + struct bpf_nh_params nh; }; DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bf5a99d803e4..e6ceac3f7d62 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3677,15 +3677,19 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * long bpf_redirect_neigh(u32 ifindex, u64 flags) + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags) * Description * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it * populates L2 addresses as well, meaning, internally, the helper - * performs a FIB lookup based on the skb's networking header to - * get the address of the next hop and then relies on the neighbor - * lookup for the L2 address of the nexthop. + * relies on the neighbor lookup for the L2 address of the nexthop. + * + * The helper will perform a FIB lookup based on the skb's + * networking header to get the address of the next hop, unless + * this is supplied by the caller in the *params* argument. The + * *plen* argument indicates the len of *params* and should be set + * to 0 if *params* is NULL. * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types, and enabled @@ -4906,6 +4910,16 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +struct bpf_redir_neigh { + /* network family for lookup (AF_INET, AF_INET6) */ + __u32 nh_family; + /* network address of nexthop; skips fib lookup to find gateway */ + union { + __be32 ipv4_nh; + __u32 ipv6_nh[4]; /* in6_addr; network order */ + }; +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ diff --git a/net/core/filter.c b/net/core/filter.c index c5e2a1c5fd8d..6d0fa65a4a46 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2165,12 +2165,12 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, } #if IS_ENABLED(CONFIG_IPV6) -static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) +static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, + struct net_device *dev, struct bpf_nh_params *nh) { - struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst->dev; u32 hh_len = LL_RESERVED_SPACE(dev); const struct in6_addr *nexthop; + struct dst_entry *dst = NULL; struct neighbour *neigh; if (dev_xmit_recursion()) { @@ -2196,8 +2196,13 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), - &ipv6_hdr(skb)->daddr); + if (!nh) { + dst = skb_dst(skb); + nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), + &ipv6_hdr(skb)->daddr); + } else { + nexthop = &nh->ipv6_nh; + } neigh = ip_neigh_gw6(dev, nexthop); if (likely(!IS_ERR(neigh))) { int ret; @@ -2210,36 +2215,43 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) return ret; } rcu_read_unlock_bh(); - IP6_INC_STATS(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + if (dst) + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); out_drop: kfree_skb(skb); return -ENETDOWN; } -static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct net *net = dev_net(dev); int err, ret = NET_XMIT_DROP; - struct dst_entry *dst; - struct flowi6 fl6 = { - .flowi6_flags = FLOWI_FLAG_ANYSRC, - .flowi6_mark = skb->mark, - .flowlabel = ip6_flowinfo(ip6h), - .flowi6_oif = dev->ifindex, - .flowi6_proto = ip6h->nexthdr, - .daddr = ip6h->daddr, - .saddr = ip6h->saddr, - }; - dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); - if (IS_ERR(dst)) - goto out_drop; + if (!nh) { + struct dst_entry *dst; + struct flowi6 fl6 = { + .flowi6_flags = FLOWI_FLAG_ANYSRC, + .flowi6_mark = skb->mark, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_oif = dev->ifindex, + .flowi6_proto = ip6h->nexthdr, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + }; + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); + if (IS_ERR(dst)) + goto out_drop; - skb_dst_set(skb, dst); + skb_dst_set(skb, dst); + } else if (nh->nh_family != AF_INET6) { + goto out_drop; + } - err = bpf_out_neigh_v6(net, skb); + err = bpf_out_neigh_v6(net, skb, dev, nh); if (unlikely(net_xmit_eval(err))) dev->stats.tx_errors++; else @@ -2252,7 +2264,8 @@ out_xmit: return ret; } #else -static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { kfree_skb(skb); return NET_XMIT_DROP; @@ -2260,11 +2273,9 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) #endif /* CONFIG_IPV6 */ #if IS_ENABLED(CONFIG_INET) -static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb) +static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, + struct net_device *dev, struct bpf_nh_params *nh) { - struct dst_entry *dst = skb_dst(skb); - struct rtable *rt = container_of(dst, struct rtable, dst); - struct net_device *dev = dst->dev; u32 hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; @@ -2292,7 +2303,21 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb) } rcu_read_lock_bh(); - neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); + if (!nh) { + struct dst_entry *dst = skb_dst(skb); + struct rtable *rt = container_of(dst, struct rtable, dst); + + neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); + } else if (nh->nh_family == AF_INET6) { + neigh = ip_neigh_gw6(dev, &nh->ipv6_nh); + is_v6gw = true; + } else if (nh->nh_family == AF_INET) { + neigh = ip_neigh_gw4(dev, nh->ipv4_nh); + } else { + rcu_read_unlock_bh(); + goto out_drop; + } + if (likely(!IS_ERR(neigh))) { int ret; @@ -2309,33 +2334,37 @@ out_drop: return -ENETDOWN; } -static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { const struct iphdr *ip4h = ip_hdr(skb); struct net *net = dev_net(dev); int err, ret = NET_XMIT_DROP; - struct rtable *rt; - struct flowi4 fl4 = { - .flowi4_flags = FLOWI_FLAG_ANYSRC, - .flowi4_mark = skb->mark, - .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_oif = dev->ifindex, - .flowi4_proto = ip4h->protocol, - .daddr = ip4h->daddr, - .saddr = ip4h->saddr, - }; - rt = ip_route_output_flow(net, &fl4, NULL); - if (IS_ERR(rt)) - goto out_drop; - if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { - ip_rt_put(rt); - goto out_drop; - } + if (!nh) { + struct flowi4 fl4 = { + .flowi4_flags = FLOWI_FLAG_ANYSRC, + .flowi4_mark = skb->mark, + .flowi4_tos = RT_TOS(ip4h->tos), + .flowi4_oif = dev->ifindex, + .flowi4_proto = ip4h->protocol, + .daddr = ip4h->daddr, + .saddr = ip4h->saddr, + }; + struct rtable *rt; + + rt = ip_route_output_flow(net, &fl4, NULL); + if (IS_ERR(rt)) + goto out_drop; + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { + ip_rt_put(rt); + goto out_drop; + } - skb_dst_set(skb, &rt->dst); + skb_dst_set(skb, &rt->dst); + } - err = bpf_out_neigh_v4(net, skb); + err = bpf_out_neigh_v4(net, skb, dev, nh); if (unlikely(net_xmit_eval(err))) dev->stats.tx_errors++; else @@ -2348,14 +2377,16 @@ out_xmit: return ret; } #else -static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { kfree_skb(skb); return NET_XMIT_DROP; } #endif /* CONFIG_INET */ -static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { struct ethhdr *ethh = eth_hdr(skb); @@ -2370,9 +2401,9 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev) skb_reset_network_header(skb); if (skb->protocol == htons(ETH_P_IP)) - return __bpf_redirect_neigh_v4(skb, dev); + return __bpf_redirect_neigh_v4(skb, dev, nh); else if (skb->protocol == htons(ETH_P_IPV6)) - return __bpf_redirect_neigh_v6(skb, dev); + return __bpf_redirect_neigh_v6(skb, dev, nh); out: kfree_skb(skb); return -ENOTSUPP; @@ -2382,7 +2413,8 @@ out: enum { BPF_F_NEIGH = (1ULL << 1), BPF_F_PEER = (1ULL << 2), -#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER) + BPF_F_NEXTHOP = (1ULL << 3), +#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP) }; BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) @@ -2455,7 +2487,8 @@ int skb_do_redirect(struct sk_buff *skb) return -EAGAIN; } return flags & BPF_F_NEIGH ? - __bpf_redirect_neigh(skb, dev) : + __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ? + &ri->nh : NULL) : __bpf_redirect(skb, dev, flags); out_drop: kfree_skb(skb); @@ -2504,16 +2537,21 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags) +BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params, + int, plen, u64, flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - if (unlikely(flags)) + if (unlikely((plen && plen < sizeof(*params)) || flags)) return TC_ACT_SHOT; - ri->flags = BPF_F_NEIGH; + ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0); ri->tgt_index = ifindex; + BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params)); + if (plen) + memcpy(&ri->nh, params, sizeof(ri->nh)); + return TC_ACT_REDIRECT; } @@ -2522,7 +2560,9 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, - .arg2_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_MEM_OR_NULL, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, }; BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes) diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 7d86fdd190be..6769caae142f 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -453,6 +453,7 @@ class PrinterHelpers(Printer): 'struct bpf_perf_event_data', 'struct bpf_perf_event_value', 'struct bpf_pidns_info', + 'struct bpf_redir_neigh', 'struct bpf_sk_lookup', 'struct bpf_sock', 'struct bpf_sock_addr', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bf5a99d803e4..e6ceac3f7d62 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3677,15 +3677,19 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * long bpf_redirect_neigh(u32 ifindex, u64 flags) + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags) * Description * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it * populates L2 addresses as well, meaning, internally, the helper - * performs a FIB lookup based on the skb's networking header to - * get the address of the next hop and then relies on the neighbor - * lookup for the L2 address of the nexthop. + * relies on the neighbor lookup for the L2 address of the nexthop. + * + * The helper will perform a FIB lookup based on the skb's + * networking header to get the address of the next hop, unless + * this is supplied by the caller in the *params* argument. The + * *plen* argument indicates the len of *params* and should be set + * to 0 if *params* is NULL. * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types, and enabled @@ -4906,6 +4910,16 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +struct bpf_redir_neigh { + /* network family for lookup (AF_INET, AF_INET6) */ + __u32 nh_family; + /* network address of nexthop; skips fib lookup to find gateway */ + union { + __be32 ipv4_nh; + __u32 ipv6_nh[4]; /* in6_addr; network order */ + }; +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ -- cgit v1.2.3