From 1c4cff0cf55011792125b6041bc4e9713e46240f Mon Sep 17 00:00:00 2001 From: Ignacy Gawędzki Date: Fri, 13 Feb 2015 14:47:05 -0800 Subject: gen_stats.c: Duplicate xstats buffer for later use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gnet_stats_copy_app() function gets called, more often than not, with its second argument a pointer to an automatic variable in the caller's stack. Therefore, to avoid copying garbage afterwards when calling gnet_stats_finish_copy(), this data is better copied to a dynamically allocated memory that gets freed after use. [xiyou.wangcong@gmail.com: remove a useless kfree()] Signed-off-by: Ignacy Gawędzki Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/gen_stats.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 0c08062d1796..1e2f46a69d50 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -32,6 +32,9 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) return 0; nla_put_failure: + kfree(d->xstats); + d->xstats = NULL; + d->xstats_len = 0; spin_unlock_bh(d->lock); return -1; } @@ -305,7 +308,9 @@ int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) { if (d->compat_xstats) { - d->xstats = st; + d->xstats = kmemdup(st, len, GFP_ATOMIC); + if (!d->xstats) + goto err_out; d->xstats_len = len; } @@ -313,6 +318,11 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) return gnet_stats_copy(d, TCA_STATS_APP, st, len); return 0; + +err_out: + d->xstats_len = 0; + spin_unlock_bh(d->lock); + return -1; } EXPORT_SYMBOL(gnet_stats_copy_app); @@ -345,6 +355,9 @@ gnet_stats_finish_copy(struct gnet_dump *d) return -1; } + kfree(d->xstats); + d->xstats = NULL; + d->xstats_len = 0; spin_unlock_bh(d->lock); return 0; } -- cgit v1.2.3 From 997d5c3f4427f38562cbe207ce05bb25fdcb993b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Feb 2015 05:47:55 -0800 Subject: sock: sock_dequeue_err_skb() needs hard irq safety Non NAPI drivers can call skb_tstamp_tx() and then sock_queue_err_skb() from hard IRQ context. Therefore, sock_dequeue_err_skb() needs to block hard irq or corruptions or hangs can happen. Signed-off-by: Eric Dumazet Fixes: 364a9e93243d1 ("sock: deduplicate errqueue dequeue") Fixes: cb820f8e4b7f7 ("net: Provide a generic socket error queue delivery method for Tx time stamps.") Signed-off-by: David S. Miller --- net/core/skbuff.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 88c613eab142..f80507823531 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3621,13 +3621,14 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) { struct sk_buff_head *q = &sk->sk_error_queue; struct sk_buff *skb, *skb_next; + unsigned long flags; int err = 0; - spin_lock_bh(&q->lock); + spin_lock_irqsave(&q->lock, flags); skb = __skb_dequeue(q); if (skb && (skb_next = skb_peek(q))) err = SKB_EXT_ERR(skb_next)->ee.ee_errno; - spin_unlock_bh(&q->lock); + spin_unlock_irqrestore(&q->lock, flags); sk->sk_err = err; if (err) -- cgit v1.2.3 From 65e9256c4ec569ed62bb89023daab7b72368b89f Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Fri, 20 Feb 2015 21:34:58 +0200 Subject: ethtool: Add hw-switch-offload to netdev_features_strings. commit aafb3e98b279 (netdev: introduce new NETIF_F_HW_SWITCH_OFFLOAD feature flag for switch device offloads) add a new feature without adding it to netdev_features_strings array; this patch fixes this. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/core/ethtool.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 91f74f3eb204..aa378ecef186 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -98,6 +98,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", [NETIF_F_BUSY_POLL_BIT] = "busy-poll", + [NETIF_F_HW_SWITCH_OFFLOAD_BIT] = "hw-switch-offload", }; static const char -- cgit v1.2.3 From a4176a9391868bfa87705bcd2e3b49e9b9dd2996 Mon Sep 17 00:00:00 2001 From: Matthew Thode Date: Tue, 17 Feb 2015 18:31:57 -0600 Subject: net: reject creation of netdev names with colons colons are used as a separator in netdev device lookup in dev_ioctl.c Specific functions are SIOCGIFTXQLEN SIOCETHTOOL SIOCSIFNAME Signed-off-by: Matthew Thode Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 8f9710c62e20..962ee9d71964 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -946,7 +946,7 @@ bool dev_valid_name(const char *name) return false; while (*name) { - if (*name == '/' || isspace(*name)) + if (*name == '/' || *name == ':' || isspace(*name)) return false; name++; } -- cgit v1.2.3 From 52d6c8c6ca125872459054daa70f2f1c698c8e75 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Feb 2015 17:03:41 -0800 Subject: net: pktgen: disable xmit_clone on virtual devices Trying to use burst capability (aka xmit_more) on a virtual device like bonding is not supported. For example, skb might be queued multiple times on a qdisc, with various list corruptions. Fixes: 38b2cf2982dc ("net: pktgen: packet bursting via skb->xmit_more") Signed-off-by: Eric Dumazet Cc: Alexei Starovoitov Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/pktgen.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/core') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b4899f5b7388..508155b283dd 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1134,6 +1134,9 @@ static ssize_t pktgen_if_write(struct file *file, return len; i += len; + if ((value > 1) && + (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) + return -ENOTSUPP; pkt_dev->burst = value < 1 ? 1 : value; sprintf(pg_result, "OK: burst=%d", pkt_dev->burst); return count; -- cgit v1.2.3 From 4e10fd5b4a7f4100007147558c304da3e73b25cf Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 24 Feb 2015 14:14:35 -0500 Subject: rtnetlink: avoid 0 sized arrays Arrays (when not in a struct) "shall have a value greater than zero". GCC complains when it's not the case here. Fixes: ba7d49b1f0 ("rtnetlink: provide api for getting and setting slave info") Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ab293a3066b3..1385de0fa080 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2012,8 +2012,8 @@ replay: } if (1) { - struct nlattr *attr[ops ? ops->maxtype + 1 : 0]; - struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0]; + struct nlattr *attr[ops ? ops->maxtype + 1 : 1]; + struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 1]; struct nlattr **data = NULL; struct nlattr **slave_data = NULL; struct net *dest_net, *link_net = NULL; -- cgit v1.2.3 From 505ce4154ac86c250aa4a84a536dd9fc56479bb5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Feb 2015 16:19:00 -0600 Subject: net: Verify permission to dest_net in newlink When applicable verify that the caller has permision to create a network device in another network namespace. This check is already present when moving a network device between network namespaces in setlink so all that is needed is to duplicate that check in newlink. This change almost backports cleanly, but there are context conflicts as the code that follows was added in v4.0-rc1 Fixes: b51642f6d77b net: Enable a userns root rtnl calls that are safe for unprivilged users Signed-off-by: "Eric W. Biederman" Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1385de0fa080..b237959c7497 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2122,6 +2122,10 @@ replay: if (IS_ERR(dest_net)) return PTR_ERR(dest_net); + err = -EPERM; + if (!netlink_ns_capable(skb, dest_net->user_ns, CAP_NET_ADMIN)) + goto out; + if (tb[IFLA_LINK_NETNSID]) { int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); -- cgit v1.2.3 From 06615bed60c1fb7c37adddb75bdc80da873b5edb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Feb 2015 16:20:07 -0600 Subject: net: Verify permission to link_net in newlink When applicable verify that the caller has permisson to the underlying network namespace for a newly created network device. Similary checks exist for the network namespace a network device will be created in. Fixes: 317f4810e45e ("rtnl: allow to create device with IFLA_LINK_NETNSID set") Signed-off-by: "Eric W. Biederman" Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b237959c7497..2c49355d16c2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2134,6 +2134,9 @@ replay: err = -EINVAL; goto out; } + err = -EPERM; + if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) + goto out; } dev = rtnl_create_link(link_net ? : dest_net, ifname, -- cgit v1.2.3 From cac5e65e8a7ea074f2626d2eaa53aa308452dec4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Feb 2015 09:42:50 -0800 Subject: net: do not use rcu in rtnl_dump_ifinfo() We did a failed attempt in the past to only use rcu in rtnl dump operations (commit e67f88dd12f6 "net: dont hold rtnl mutex during netlink dump callbacks") Now that dumps are holding RTNL anyway, there is no need to also use rcu locking, as it forbids any scheduling ability, like GFP_KERNEL allocations that controlling path should use instead of GFP_ATOMIC whenever possible. This should fix following splat Cong Wang reported : [ INFO: suspicious RCU usage. ] 3.19.0+ #805 Tainted: G W include/linux/rcupdate.h:538 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by ip/771: #0: (rtnl_mutex){+.+.+.}, at: [] netlink_dump+0x21/0x26c #1: (rcu_read_lock){......}, at: [] rcu_read_lock+0x0/0x6e stack backtrace: CPU: 3 PID: 771 Comm: ip Tainted: G W 3.19.0+ #805 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 0000000000000001 ffff8800d51e7718 ffffffff81a27457 0000000029e729e6 ffff8800d6108000 ffff8800d51e7748 ffffffff810b539b ffffffff820013dd 00000000000001c8 0000000000000000 ffff8800d7448088 ffff8800d51e7758 Call Trace: [] dump_stack+0x4c/0x65 [] lockdep_rcu_suspicious+0x107/0x110 [] rcu_preempt_sleep_check+0x45/0x47 [] ___might_sleep+0x1d/0x1cb [] __might_sleep+0x78/0x80 [] idr_alloc+0x45/0xd1 [] ? rcu_read_lock_held+0x3b/0x3d [] ? idr_for_each+0x53/0x101 [] alloc_netid+0x61/0x69 [] __peernet2id+0x79/0x8d [] peernet2id+0x13/0x1f [] rtnl_fill_ifinfo+0xa8d/0xc20 [] ? __lock_is_held+0x39/0x52 [] rtnl_dump_ifinfo+0x149/0x213 [] netlink_dump+0xef/0x26c [] netlink_recvmsg+0x17b/0x2c5 [] __sock_recvmsg+0x4e/0x59 [] sock_recvmsg+0x3f/0x51 [] ___sys_recvmsg+0xf6/0x1d9 [] ? handle_pte_fault+0x6e1/0xd3d [] ? native_sched_clock+0x35/0x37 [] ? sched_clock_local+0x12/0x72 [] ? sched_clock_cpu+0x9e/0xb7 [] ? rcu_read_lock_held+0x3b/0x3d [] ? __fcheck_files+0x4c/0x58 [] ? __fget_light+0x2d/0x52 [] __sys_recvmsg+0x42/0x60 [] SyS_recvmsg+0x12/0x1c Signed-off-by: Eric Dumazet Fixes: 0c7aecd4bde4b7302 ("netns: add rtnl cmd to add and get peer netns ids") Cc: Nicolas Dichtel Reported-by: Cong Wang Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2c49355d16c2..25b4b5d23485 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1300,7 +1300,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) s_h = cb->args[0]; s_idx = cb->args[1]; - rcu_read_lock(); cb->seq = net->dev_base_seq; /* A hack to preserve kernel<->userspace interface. @@ -1322,7 +1321,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, head, index_hlist) { + hlist_for_each_entry(dev, head, index_hlist) { if (idx < s_idx) goto cont; err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -1344,7 +1343,6 @@ cont: } } out: - rcu_read_unlock(); cb->args[1] = idx; cb->args[0] = h; -- cgit v1.2.3 From 7768eed8bf1d2e5eefa38c573f15f737a9824052 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 10 Mar 2015 19:03:46 +0100 Subject: net: add comment for sock_efree() usage Signed-off-by: Oliver Hartkopp Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/sock.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index 93c8b20c91e4..78e89eb7eb70 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1655,6 +1655,10 @@ void sock_rfree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_rfree); +/* + * Buffer destructor for skbs that are not used directly in read or write + * path, e.g. for error handler skbs. Automatically called from kfree_skb. + */ void sock_efree(struct sk_buff *skb) { sock_put(skb->sk); -- cgit v1.2.3 From 4363890079674db7b00cf1bb0e6fa430e846e86b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Mar 2015 21:58:32 -0400 Subject: net: Handle unregister properly when netdev namespace change fails. If rtnl_newlink() fails on it's call to dev_change_net_namespace(), we have to make use of the ->dellink() method, if present, just like we do when rtnl_configure_link() fails. Fixes: 317f4810e45e ("rtnl: allow to create device with IFLA_LINK_NETNSID set") Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 25b4b5d23485..ee0608bb3bc0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2166,28 +2166,28 @@ replay: } } err = rtnl_configure_link(dev, ifm); - if (err < 0) { - if (ops->newlink) { - LIST_HEAD(list_kill); - - ops->dellink(dev, &list_kill); - unregister_netdevice_many(&list_kill); - } else { - unregister_netdevice(dev); - } - goto out; - } - + if (err < 0) + goto out_unregister; if (link_net) { err = dev_change_net_namespace(dev, dest_net, ifname); if (err < 0) - unregister_netdevice(dev); + goto out_unregister; } out: if (link_net) put_net(link_net); put_net(dest_net); return err; +out_unregister: + if (ops->newlink) { + LIST_HEAD(list_kill); + + ops->dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + } else { + unregister_netdevice(dev); + } + goto out; } } -- cgit v1.2.3 From b1cb59cf2efe7971d3d72a7b963d09a512d994c9 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Wed, 11 Mar 2015 14:29:17 +0300 Subject: net: sysctl_net_core: check SNDBUF and RCVBUF for min length sysctl has sysctl.net.core.rmem_*/wmem_* parameters which can be set to incorrect values. Given that 'struct sk_buff' allocates from rcvbuf, incorrectly set buffer length could result to memory allocation failures. For example, set them as follows: # sysctl net.core.rmem_default=64 net.core.wmem_default = 64 # sysctl net.core.wmem_default=64 net.core.wmem_default = 64 # ping localhost -s 1024 -i 0 > /dev/null This could result to the following failure: skbuff: skb_over_panic: text:ffffffff81628db4 len:-32 put:-32 head:ffff88003a1cc200 data:ffff88003a1cc200 tail:0xffffffe0 end:0xc0 dev: kernel BUG at net/core/skbuff.c:102! invalid opcode: 0000 [#1] SMP ... task: ffff88003b7f5550 ti: ffff88003ae88000 task.ti: ffff88003ae88000 RIP: 0010:[] [] skb_put+0xa1/0xb0 RSP: 0018:ffff88003ae8bc68 EFLAGS: 00010296 RAX: 000000000000008d RBX: 00000000ffffffe0 RCX: 0000000000000000 RDX: ffff88003fdcf598 RSI: ffff88003fdcd9c8 RDI: ffff88003fdcd9c8 RBP: ffff88003ae8bc88 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 00000000000002b2 R12: 0000000000000000 R13: 0000000000000000 R14: ffff88003d3f7300 R15: ffff88000012a900 FS: 00007fa0e2b4a840(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000d0f7e0 CR3: 000000003b8fb000 CR4: 00000000000006f0 Stack: ffff88003a1cc200 00000000ffffffe0 00000000000000c0 ffffffff818cab1d ffff88003ae8bd68 ffffffff81628db4 ffff88003ae8bd48 ffff88003b7f5550 ffff880031a09408 ffff88003b7f5550 ffff88000012aa48 ffff88000012ab00 Call Trace: [] unix_stream_sendmsg+0x2c4/0x470 [] sock_write_iter+0x146/0x160 [] new_sync_write+0x92/0xd0 [] vfs_write+0xd6/0x180 [] SyS_write+0x59/0xd0 [] system_call_fastpath+0x12/0x17 Code: 00 00 48 89 44 24 10 8b 87 c8 00 00 00 48 89 44 24 08 48 8b 87 d8 00 00 00 48 c7 c7 30 db 91 81 48 89 04 24 31 c0 e8 4f a8 0e 00 <0f> 0b eb fe 66 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83 RIP [] skb_put+0xa1/0xb0 RSP Kernel panic - not syncing: Fatal exception Moreover, the possible minimum is 1, so we can get another kernel panic: ... BUG: unable to handle kernel paging request at ffff88013caee5c0 IP: [] __alloc_skb+0x12f/0x1f0 ... Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- net/core/sysctl_net_core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 433424804284..8ce351ffceb1 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -25,6 +25,8 @@ static int zero = 0; static int one = 1; static int ushort_max = USHRT_MAX; +static int min_sndbuf = SOCK_MIN_SNDBUF; +static int min_rcvbuf = SOCK_MIN_RCVBUF; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -237,7 +239,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_sndbuf, }, { .procname = "rmem_max", @@ -245,7 +247,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_rcvbuf, }, { .procname = "wmem_default", @@ -253,7 +255,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_sndbuf, }, { .procname = "rmem_default", @@ -261,7 +263,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_rcvbuf, }, { .procname = "dev_weight", -- cgit v1.2.3 From c29390c6dfeee0944ac6b5610ebbe403944378fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Mar 2015 18:42:02 -0700 Subject: xps: must clear sender_cpu before forwarding John reported that my previous commit added a regression on his router. This is because sender_cpu & napi_id share a common location, so get_xps_queue() can see garbage and perform an out of bound access. We need to make sure sender_cpu is cleared before doing the transmit, otherwise any NIC busy poll enabled (skb_mark_napi_id()) can trigger this bug. Signed-off-by: Eric Dumazet Reported-by: John Bisected-by: John Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices") Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 +++++++ net/core/skbuff.c | 2 +- net/ipv4/ip_forward.c | 1 + net/ipv6/ip6_output.c | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 30007afe70b3..f54d6659713a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -948,6 +948,13 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) to->l4_hash = from->l4_hash; }; +static inline void skb_sender_cpu_clear(struct sk_buff *skb) +{ +#ifdef CONFIG_XPS + skb->sender_cpu = 0; +#endif +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f80507823531..434e78e5254d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4173,7 +4173,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->ignore_df = 0; skb_dst_drop(skb); skb->mark = 0; - skb->sender_cpu = 0; + skb_sender_cpu_clear(skb); skb_init_secmark(skb); secpath_reset(skb); nf_reset(skb); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 787b3c294ce6..d9bc28ac5d1b 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -67,6 +67,7 @@ static int ip_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); + skb_sender_cpu_clear(skb); return dst_output(skb); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0a04a37305d5..7e80b61b51ff 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -318,6 +318,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) static inline int ip6_forward_finish(struct sk_buff *skb) { + skb_sender_cpu_clear(skb); return dst_output(skb); } -- cgit v1.2.3 From 3a8dd9711e0792f64394edafadd66c2d1f1904df Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 11 Mar 2015 15:43:55 -0400 Subject: sock: fix possible NULL sk dereference in __skb_tstamp_tx Test that sk != NULL before reading sk->sk_tsflags. Fixes: 49ca0d8bfaf3 ("net-timestamp: no-payload option") Reported-by: One Thousand Gnomes Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 434e78e5254d..8e4ac97c8477 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3733,9 +3733,13 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, struct sock *sk, int tstype) { struct sk_buff *skb; - bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; + bool tsonly; - if (!sk || !skb_may_tx_timestamp(sk, tsonly)) + if (!sk) + return; + + tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; + if (!skb_may_tx_timestamp(sk, tsonly)) return; if (tsonly) -- cgit v1.2.3