From 47b70db5558388b3f4ecd10b492a0b3f6d680789 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Fri, 19 Oct 2012 01:09:30 +0000 Subject: net:dev: remove double indentical assignment in dev_change_net_namespace(). This patch removes double assignment of err to -EINVAL in dev_change_net_namespace(). Signed-off-by: Rami Rosen Acked-by: Serge E. Hallyn Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 09cb3f6dc40c..b4978e2d6ddf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6264,7 +6264,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char goto out; /* Ensure the device has been registrered */ - err = -EINVAL; if (dev->reg_state != NETREG_REGISTERED) goto out; -- cgit v1.2.3 From 62532da9d5f47a7ced3b965aa73ffd5b1afbeb79 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:10 +0000 Subject: net: Add generic packet offload infrastructure. Create a new data structure to contain the GRO/GSO callbacks and add a new registration mechanism. Singed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +++++++++ net/core/dev.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7bf867c97043..d45a58db4ba3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1521,6 +1521,17 @@ struct packet_type { struct list_head list; }; +struct packet_offload { + __be16 type; /* This is really htons(ether_type). */ + struct sk_buff *(*gso_segment)(struct sk_buff *skb, + netdev_features_t features); + int (*gso_send_check)(struct sk_buff *skb); + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sk_buff *skb); + struct list_head list; +}; + #include /* netdevice notifier chain. Please remember to update the rtnetlink @@ -1615,6 +1626,9 @@ extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short extern void dev_add_pack(struct packet_type *pt); extern void dev_remove_pack(struct packet_type *pt); extern void __dev_remove_pack(struct packet_type *pt); +extern void dev_add_offload(struct packet_offload *po); +extern void dev_remove_offload(struct packet_offload *po); +extern void __dev_remove_offload(struct packet_offload *po); extern struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, unsigned short mask); diff --git a/net/core/dev.c b/net/core/dev.c index 83232a1be1e7..6884f8783bdd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -176,8 +176,10 @@ #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) static DEFINE_SPINLOCK(ptype_lock); +static DEFINE_SPINLOCK(offload_lock); static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; static struct list_head ptype_all __read_mostly; /* Taps */ +static struct list_head offload_base __read_mostly; /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl @@ -470,6 +472,82 @@ void dev_remove_pack(struct packet_type *pt) } EXPORT_SYMBOL(dev_remove_pack); + +/** + * dev_add_offload - register offload handlers + * @po: protocol offload declaration + * + * Add protocol offload handlers to the networking stack. The passed + * &proto_offload is linked into kernel lists and may not be freed until + * it has been removed from the kernel lists. + * + * This call does not sleep therefore it can not + * guarantee all CPU's that are in middle of receiving packets + * will see the new offload handlers (until the next received packet). + */ +void dev_add_offload(struct packet_offload *po) +{ + struct list_head *head = &offload_base; + + spin_lock(&offload_lock); + list_add_rcu(&po->list, head); + spin_unlock(&offload_lock); +} +EXPORT_SYMBOL(dev_add_offload); + +/** + * __dev_remove_offload - remove offload handler + * @po: packet offload declaration + * + * Remove a protocol offload handler that was previously added to the + * kernel offload handlers by dev_add_offload(). The passed &offload_type + * is removed from the kernel lists and can be freed or reused once this + * function returns. + * + * The packet type might still be in use by receivers + * and must not be freed until after all the CPU's have gone + * through a quiescent state. + */ +void __dev_remove_offload(struct packet_offload *po) +{ + struct list_head *head = &offload_base; + struct packet_offload *po1; + + spin_lock(&ptype_lock); + + list_for_each_entry(po1, head, list) { + if (po == po1) { + list_del_rcu(&po->list); + goto out; + } + } + + pr_warn("dev_remove_offload: %p not found\n", po); +out: + spin_unlock(&ptype_lock); +} +EXPORT_SYMBOL(__dev_remove_offload); + +/** + * dev_remove_offload - remove packet offload handler + * @po: packet offload declaration + * + * Remove a packet offload handler that was previously added to the kernel + * offload handlers by dev_add_offload(). The passed &offload_type is + * removed from the kernel lists and can be freed or reused once this + * function returns. + * + * This call sleeps to guarantee that no CPU is looking at the packet + * type after return. + */ +void dev_remove_offload(struct packet_offload *po) +{ + __dev_remove_offload(po); + + synchronize_net(); +} +EXPORT_SYMBOL(dev_remove_offload); + /****************************************************************************** Device Boot-time Settings Routines @@ -6661,6 +6739,8 @@ static int __init net_dev_init(void) for (i = 0; i < PTYPE_HASH_SIZE; i++) INIT_LIST_HEAD(&ptype_base[i]); + INIT_LIST_HEAD(&offload_base); + if (register_pernet_subsys(&netdev_net_ops)) goto out; -- cgit v1.2.3 From 22061d8014455b01eb018bd6c35a1b3040ccc230 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:11 +0000 Subject: net: Switch to using the new packet offload infrustructure Convert to using the new GSO/GRO registration mechanism and new packet offload structure. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ------ net/core/dev.c | 19 +++++++++---------- net/ipv4/af_inet.c | 5 +++++ net/ipv6/af_inet6.c | 6 ++++++ 4 files changed, 20 insertions(+), 16 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d45a58db4ba3..61bc8483031f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1509,12 +1509,6 @@ struct packet_type { struct net_device *, struct packet_type *, struct net_device *); - struct sk_buff *(*gso_segment)(struct sk_buff *skb, - netdev_features_t features); - int (*gso_send_check)(struct sk_buff *skb); - struct sk_buff **(*gro_receive)(struct sk_buff **head, - struct sk_buff *skb); - int (*gro_complete)(struct sk_buff *skb); bool (*id_match)(struct packet_type *ptype, struct sock *sk); void *af_packet_priv; diff --git a/net/core/dev.c b/net/core/dev.c index 6884f8783bdd..cf843a256cc6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2072,7 +2072,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_type *ptype; + struct packet_offload *ptype; __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; int err; @@ -2101,9 +2101,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, } rcu_read_lock(); - list_for_each_entry_rcu(ptype, - &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && !ptype->dev && ptype->gso_segment) { + list_for_each_entry_rcu(ptype, &offload_base, list) { + if (ptype->type == type && ptype->gso_segment) { if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { err = ptype->gso_send_check(skb); segs = ERR_PTR(err); @@ -3522,9 +3521,9 @@ static void flush_backlog(void *arg) static int napi_gro_complete(struct sk_buff *skb) { - struct packet_type *ptype; + struct packet_offload *ptype; __be16 type = skb->protocol; - struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + struct list_head *head = &offload_base; int err = -ENOENT; if (NAPI_GRO_CB(skb)->count == 1) { @@ -3534,7 +3533,7 @@ static int napi_gro_complete(struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || ptype->dev || !ptype->gro_complete) + if (ptype->type != type || !ptype->gro_complete) continue; err = ptype->gro_complete(skb); @@ -3584,9 +3583,9 @@ EXPORT_SYMBOL(napi_gro_flush); enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; - struct packet_type *ptype; + struct packet_offload *ptype; __be16 type = skb->protocol; - struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + struct list_head *head = &offload_base; int same_flow; int mac_len; enum gro_result ret; @@ -3599,7 +3598,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || ptype->dev || !ptype->gro_receive) + if (ptype->type != type || !ptype->gro_receive) continue; skb_set_network_header(skb, skb_gro_offset(skb)); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 766c59658563..4c99c5fdba3f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1662,6 +1662,10 @@ static int ipv4_proc_init(void); static struct packet_type ip_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IP), .func = ip_rcv, +}; + +static struct packet_offload ip_packet_offload __read_mostly = { + .type = cpu_to_be16(ETH_P_IP), .gso_send_check = inet_gso_send_check, .gso_segment = inet_gso_segment, .gro_receive = inet_gro_receive, @@ -1781,6 +1785,7 @@ static int __init inet_init(void) ipfrag_init(); + dev_add_offload(&ip_packet_offload); dev_add_pack(&ip_packet_type); rc = 0; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a974247a9ae4..6e245177608c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -938,6 +938,10 @@ out_unlock: static struct packet_type ipv6_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .func = ipv6_rcv, +}; + +static struct packet_offload ipv6_packet_offload __read_mostly = { + .type = cpu_to_be16(ETH_P_IPV6), .gso_send_check = ipv6_gso_send_check, .gso_segment = ipv6_gso_segment, .gro_receive = ipv6_gro_receive, @@ -946,6 +950,7 @@ static struct packet_type ipv6_packet_type __read_mostly = { static int __init ipv6_packet_init(void) { + dev_add_offload(&ipv6_packet_offload); dev_add_pack(&ipv6_packet_type); return 0; } @@ -953,6 +958,7 @@ static int __init ipv6_packet_init(void) static void ipv6_packet_cleanup(void) { dev_remove_pack(&ipv6_packet_type); + dev_remove_offload(&ipv6_packet_offload); } static int __net_init ipv6_init_mibs(struct net *net) -- cgit v1.2.3 From f191a1d17f227032c159e5499809f545402b6dc6 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:23 +0000 Subject: net: Remove code duplication between offload structures Move the offload callbacks into its own structure. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 +++++++--- include/net/protocol.h | 10 +++------- net/core/dev.c | 14 +++++++------- net/ipv4/af_inet.c | 42 ++++++++++++++++++++++++------------------ net/ipv6/ip6_offload.c | 28 +++++++++++++++------------- net/ipv6/tcpv6_offload.c | 10 ++++++---- net/ipv6/udp_offload.c | 6 ++++-- 7 files changed, 66 insertions(+), 54 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 61bc8483031f..e46c830c88d8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1515,15 +1515,19 @@ struct packet_type { struct list_head list; }; -struct packet_offload { - __be16 type; /* This is really htons(ether_type). */ +struct offload_callbacks { struct sk_buff *(*gso_segment)(struct sk_buff *skb, netdev_features_t features); int (*gso_send_check)(struct sk_buff *skb); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb); - struct list_head list; +}; + +struct packet_offload { + __be16 type; /* This is really htons(ether_type). */ + struct offload_callbacks callbacks; + struct list_head list; }; #include diff --git a/include/net/protocol.h b/include/net/protocol.h index 2c90794c139d..047c0476c0a0 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -29,6 +29,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include #endif +#include /* This is one larger than the largest protocol value that can be * found in an ipv4 or ipv6 header. Since in both cases the protocol @@ -63,13 +64,8 @@ struct inet6_protocol { #endif struct net_offload { - int (*gso_send_check)(struct sk_buff *skb); - struct sk_buff *(*gso_segment)(struct sk_buff *skb, - netdev_features_t features); - struct sk_buff **(*gro_receive)(struct sk_buff **head, - struct sk_buff *skb); - int (*gro_complete)(struct sk_buff *skb); - unsigned int flags; /* Flags used by IPv6 for now */ + struct offload_callbacks callbacks; + unsigned int flags; /* Flags used by IPv6 for now */ }; /* This should be set for any extension header which is compatible with GSO. */ #define INET6_PROTO_GSO_EXTHDR 0x1 diff --git a/net/core/dev.c b/net/core/dev.c index cf843a256cc6..cf105e886cca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2102,16 +2102,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { - if (ptype->type == type && ptype->gso_segment) { + if (ptype->type == type && ptype->callbacks.gso_segment) { if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { - err = ptype->gso_send_check(skb); + err = ptype->callbacks.gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) break; __skb_push(skb, (skb->data - skb_network_header(skb))); } - segs = ptype->gso_segment(skb, features); + segs = ptype->callbacks.gso_segment(skb, features); break; } } @@ -3533,10 +3533,10 @@ static int napi_gro_complete(struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || !ptype->gro_complete) + if (ptype->type != type || !ptype->callbacks.gro_complete) continue; - err = ptype->gro_complete(skb); + err = ptype->callbacks.gro_complete(skb); break; } rcu_read_unlock(); @@ -3598,7 +3598,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || !ptype->gro_receive) + if (ptype->type != type || !ptype->callbacks.gro_receive) continue; skb_set_network_header(skb, skb_gro_offset(skb)); @@ -3608,7 +3608,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - pp = ptype->gro_receive(&napi->gro_list, skb); + pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; } rcu_read_unlock(); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9f2e7fd8bea8..d5e5a054123c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1276,8 +1276,8 @@ static int inet_gso_send_check(struct sk_buff *skb) rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); - if (likely(ops && ops->gso_send_check)) - err = ops->gso_send_check(skb); + if (likely(ops && ops->callbacks.gso_send_check)) + err = ops->callbacks.gso_send_check(skb); rcu_read_unlock(); out: @@ -1326,8 +1326,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); - if (likely(ops && ops->gso_segment)) - segs = ops->gso_segment(skb, features); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); rcu_read_unlock(); if (!segs || IS_ERR(segs)) @@ -1379,7 +1379,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); - if (!ops || !ops->gro_receive) + if (!ops || !ops->callbacks.gro_receive) goto out_unlock; if (*(u8 *)iph != 0x45) @@ -1420,7 +1420,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); - pp = ops->gro_receive(head, skb); + pp = ops->callbacks.gro_receive(head, skb); out_unlock: rcu_read_unlock(); @@ -1444,10 +1444,10 @@ static int inet_gro_complete(struct sk_buff *skb) rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); - if (WARN_ON(!ops || !ops->gro_complete)) + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->gro_complete(skb); + err = ops->callbacks.gro_complete(skb); out_unlock: rcu_read_unlock(); @@ -1563,10 +1563,12 @@ static const struct net_protocol tcp_protocol = { }; static const struct net_offload tcp_offload = { - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, + .callbacks = { + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, + }, }; static const struct net_protocol udp_protocol = { @@ -1577,8 +1579,10 @@ static const struct net_protocol udp_protocol = { }; static const struct net_offload udp_offload = { - .gso_send_check = udp4_ufo_send_check, - .gso_segment = udp4_ufo_fragment, + .callbacks = { + .gso_send_check = udp4_ufo_send_check, + .gso_segment = udp4_ufo_fragment, + }, }; static const struct net_protocol icmp_protocol = { @@ -1667,10 +1671,12 @@ static int ipv4_proc_init(void); static struct packet_offload ip_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IP), - .gso_send_check = inet_gso_send_check, - .gso_segment = inet_gso_segment, - .gro_receive = inet_gro_receive, - .gro_complete = inet_gro_complete, + .callbacks = { + .gso_send_check = inet_gso_send_check, + .gso_segment = inet_gso_segment, + .gro_receive = inet_gro_receive, + .gro_complete = inet_gro_complete, + }, }; static int __init ipv4_offload_init(void) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 63d79d9005bd..f26f0da7f095 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -70,9 +70,9 @@ static int ipv6_gso_send_check(struct sk_buff *skb) ops = rcu_dereference(inet6_offloads[ ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); - if (likely(ops && ops->gso_send_check)) { + if (likely(ops && ops->callbacks.gso_send_check)) { skb_reset_transport_header(skb); - err = ops->gso_send_check(skb); + err = ops->callbacks.gso_send_check(skb); } rcu_read_unlock(); @@ -113,9 +113,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); rcu_read_lock(); ops = rcu_dereference(inet6_offloads[proto]); - if (likely(ops && ops->gso_segment)) { + if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); - segs = ops->gso_segment(skb, features); + segs = ops->callbacks.gso_segment(skb, features); } rcu_read_unlock(); @@ -173,7 +173,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, rcu_read_lock(); proto = iph->nexthdr; ops = rcu_dereference(inet6_offloads[proto]); - if (!ops || !ops->gro_receive) { + if (!ops || !ops->callbacks.gro_receive) { __pskb_pull(skb, skb_gro_offset(skb)); proto = ipv6_gso_pull_exthdrs(skb, proto); skb_gro_pull(skb, -skb_transport_offset(skb)); @@ -181,7 +181,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, __skb_push(skb, skb_gro_offset(skb)); ops = rcu_dereference(inet6_offloads[proto]); - if (!ops || !ops->gro_receive) + if (!ops || !ops->callbacks.gro_receive) goto out_unlock; iph = ipv6_hdr(skb); @@ -220,7 +220,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, csum = skb->csum; skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); - pp = ops->gro_receive(head, skb); + pp = ops->callbacks.gro_receive(head, skb); skb->csum = csum; @@ -244,10 +244,10 @@ static int ipv6_gro_complete(struct sk_buff *skb) rcu_read_lock(); ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]); - if (WARN_ON(!ops || !ops->gro_complete)) + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->gro_complete(skb); + err = ops->callbacks.gro_complete(skb); out_unlock: rcu_read_unlock(); @@ -257,10 +257,12 @@ out_unlock: static struct packet_offload ipv6_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), - .gso_send_check = ipv6_gso_send_check, - .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, - .gro_complete = ipv6_gro_complete, + .callbacks = { + .gso_send_check = ipv6_gso_send_check, + .gso_segment = ipv6_gso_segment, + .gro_receive = ipv6_gro_receive, + .gro_complete = ipv6_gro_complete, + }, }; static int __init ipv6_offload_init(void) diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 3a27fe685c8e..2ec6bf6a0aa0 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -81,10 +81,12 @@ static int tcp6_gro_complete(struct sk_buff *skb) } static const struct net_offload tcpv6_offload = { - .gso_send_check = tcp_v6_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp6_gro_receive, - .gro_complete = tcp6_gro_complete, + .callbacks = { + .gso_send_check = tcp_v6_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp6_gro_receive, + .gro_complete = tcp6_gro_complete, + }, }; int __init tcpv6_offload_init(void) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 979e4ab63a8b..8e01c44a987c 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -107,8 +107,10 @@ out: return segs; } static const struct net_offload udpv6_offload = { - .gso_send_check = udp6_ufo_send_check, - .gso_segment = udp6_ufo_fragment, + .callbacks = { + .gso_send_check = udp6_ufo_send_check, + .gso_segment = udp6_ufo_fragment, + }, }; int __init udp_offload_init(void) -- cgit v1.2.3 From c53aa5058ad5ca8876a47d6639ad4d4f2c5ed584 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 Nov 2012 08:08:23 +0000 Subject: net: use right lock in __dev_remove_offload offload_base is protected by offload_lock, not ptype_lock Signed-off-by: Eric Dumazet Cc: Vlad Yasevich Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index cf105e886cca..2705a2ab89af 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -513,7 +513,7 @@ void __dev_remove_offload(struct packet_offload *po) struct list_head *head = &offload_base; struct packet_offload *po1; - spin_lock(&ptype_lock); + spin_lock(&offload_lock); list_for_each_entry(po1, head, list) { if (po == po1) { @@ -524,7 +524,7 @@ void __dev_remove_offload(struct packet_offload *po) pr_warn("dev_remove_offload: %p not found\n", po); out: - spin_unlock(&ptype_lock); + spin_unlock(&offload_lock); } EXPORT_SYMBOL(__dev_remove_offload); -- cgit v1.2.3 From 5e1fccc0bfac4946932b36e4535c03957d35113d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:03:04 +0000 Subject: net: Allow userns root control of the core of the network stack. Allow an unpriviled user who has created a user namespace, and then created a network namespace to effectively use the new network namespace, by reducing capable(CAP_NET_ADMIN) and capable(CAP_NET_RAW) calls to be ns_capable(net->user_ns, CAP_NET_ADMIN), or capable(net->user_ns, CAP_NET_RAW) calls. Settings that merely control a single network device are allowed. Either the network device is a logical network device where restrictions make no difference or the network device is hardware NIC that has been explicity moved from the initial network namespace. In general policy and network stack state changes are allowed while resource control is left unchanged. Allow ethtool ioctls. Allow binding to network devices. Allow setting the socket mark. Allow setting the socket priority. Allow setting the network device alias via sysfs. Allow setting the mtu via sysfs. Allow changing the network device flags via sysfs. Allow setting the network device group via sysfs. Allow the following network device ioctls. SIOCGMIIPHY SIOCGMIIREG SIOCSIFNAME SIOCSIFFLAGS SIOCSIFMETRIC SIOCSIFMTU SIOCSIFHWADDR SIOCSIFSLAVE SIOCADDMULTI SIOCDELMULTI SIOCSIFHWBROADCAST SIOCSMIIREG SIOCBONDENSLAVE SIOCBONDRELEASE SIOCBONDSETHWADDR SIOCBONDCHANGEACTIVE SIOCBRADDIF SIOCBRDELIF SIOCSHWTSTAMP Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/dev.c | 17 +++++++++++++---- net/core/ethtool.c | 2 +- net/core/net-sysfs.c | 15 ++++++++++----- net/core/sock.c | 7 ++++--- 4 files changed, 28 insertions(+), 13 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 974199daa911..0afae8ba413e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5279,7 +5279,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSIFNAME: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; dev_load(net, ifr.ifr_name); rtnl_lock(); @@ -5300,16 +5300,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) * - require strict serialization. * - do not return a value */ + case SIOCSIFMAP: + case SIOCSIFTXQLEN: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + /* fall through */ + /* + * These ioctl calls: + * - require local superuser power. + * - require strict serialization. + * - do not return a value + */ case SIOCSIFFLAGS: case SIOCSIFMETRIC: case SIOCSIFMTU: - case SIOCSIFMAP: case SIOCSIFHWADDR: case SIOCSIFSLAVE: case SIOCADDMULTI: case SIOCDELMULTI: case SIOCSIFHWBROADCAST: - case SIOCSIFTXQLEN: case SIOCSMIIREG: case SIOCBONDENSLAVE: case SIOCBONDRELEASE: @@ -5318,7 +5327,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCBRADDIF: case SIOCBRDELIF: case SIOCSHWTSTAMP: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; /* fall through */ case SIOCBONDSLAVEINFOQUERY: diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4d64cc2e3fa9..a8705432e4b1 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1460,7 +1460,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GEEE: break; default: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index bcf02f608cbf..c66b8c2f3b22 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -73,11 +73,12 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len, int (*set)(struct net_device *, unsigned long)) { - struct net_device *net = to_net_dev(dev); + struct net_device *netdev = to_net_dev(dev); + struct net *net = dev_net(netdev); unsigned long new; int ret = -EINVAL; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; ret = kstrtoul(buf, 0, &new); @@ -87,8 +88,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, if (!rtnl_trylock()) return restart_syscall(); - if (dev_isalive(net)) { - if ((ret = (*set)(net, new)) == 0) + if (dev_isalive(netdev)) { + if ((ret = (*set)(netdev, new)) == 0) ret = len; } rtnl_unlock(); @@ -264,6 +265,9 @@ static ssize_t store_tx_queue_len(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + return netdev_store(dev, attr, buf, len, change_tx_queue_len); } @@ -271,10 +275,11 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct net_device *netdev = to_net_dev(dev); + struct net *net = dev_net(netdev); size_t count = len; ssize_t ret; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; /* ignore trailing newline */ diff --git a/net/core/sock.c b/net/core/sock.c index 06286006a2cc..d4f7b58b3866 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -515,7 +515,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) /* Sorry... */ ret = -EPERM; - if (!capable(CAP_NET_RAW)) + if (!ns_capable(net->user_ns, CAP_NET_RAW)) goto out; ret = -EINVAL; @@ -696,7 +696,8 @@ set_rcvbuf: break; case SO_PRIORITY: - if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) + if ((val >= 0 && val <= 6) || + ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) sk->sk_priority = val; else ret = -EPERM; @@ -813,7 +814,7 @@ set_rcvbuf: clear_bit(SOCK_PASSSEC, &sock->flags); break; case SO_MARK: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) ret = -EPERM; else sk->sk_mark = val; -- cgit v1.2.3 From 388dfc2d2d9c43c251921a397d6fe5ef7dc34731 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 20 Nov 2012 00:57:04 +0000 Subject: net: Remove redundant null check before kfree in dev.c kfree on a null pointer is a no-op. Signed-off-by: Sachin Kamat Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 0afae8ba413e..7304ea8a1f13 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1153,10 +1153,8 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return -EINVAL; if (!len) { - if (dev->ifalias) { - kfree(dev->ifalias); - dev->ifalias = NULL; - } + kfree(dev->ifalias); + dev->ifalias = NULL; return 0; } -- cgit v1.2.3 From c91f6df2db4972d3cc983e6988b9abf1ad02f5f9 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Mon, 26 Nov 2012 05:21:08 +0000 Subject: sockopt: Change getsockopt() of SO_BINDTODEVICE to return an interface name Instead of having the getsockopt() of SO_BINDTODEVICE return an index, which will then require another call like if_indextoname() to get the actual interface name, have it return the name directly. This also matches the existing man page description on socket(7) which mentions the argument being an interface name. If the value has not been set, zero is returned and optlen will be set to zero to indicate there is no interface name present. Added a seqlock to protect this code path, and dev_ifname(), from someone changing the device name via dev_change_name(). v2: Added seqlock protection while copying device name. v3: Fixed word wrap in patch. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 21 ++++++++++++++-- net/core/sock.c | 64 ++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 81 insertions(+), 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e46c830c88d8..e9929abeb932 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1567,6 +1567,8 @@ extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern rwlock_t dev_base_lock; /* Device list lock */ +extern seqlock_t devnet_rename_seq; /* Device rename lock */ + #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) diff --git a/net/core/dev.c b/net/core/dev.c index 7304ea8a1f13..2a5f55866429 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -203,6 +203,8 @@ static struct list_head offload_base __read_mostly; DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +DEFINE_SEQLOCK(devnet_rename_seq); + static inline void dev_base_seq_inc(struct net *net) { while (++net->dev_base_seq == 0); @@ -1091,22 +1093,31 @@ int dev_change_name(struct net_device *dev, const char *newname) if (dev->flags & IFF_UP) return -EBUSY; - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) + write_seqlock(&devnet_rename_seq); + + if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { + write_sequnlock(&devnet_rename_seq); return 0; + } memcpy(oldname, dev->name, IFNAMSIZ); err = dev_get_valid_name(net, dev, newname); - if (err < 0) + if (err < 0) { + write_sequnlock(&devnet_rename_seq); return err; + } rollback: ret = device_rename(&dev->dev, dev->name); if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); + write_sequnlock(&devnet_rename_seq); return ret; } + write_sequnlock(&devnet_rename_seq); + write_lock_bh(&dev_base_lock); hlist_del_rcu(&dev->name_hlist); write_unlock_bh(&dev_base_lock); @@ -1124,6 +1135,7 @@ rollback: /* err >= 0 after dev_alloc_name() or stores the first errno */ if (err >= 0) { err = ret; + write_seqlock(&devnet_rename_seq); memcpy(dev->name, oldname, IFNAMSIZ); goto rollback; } else { @@ -4148,6 +4160,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) { struct net_device *dev; struct ifreq ifr; + unsigned seq; /* * Fetch the caller's info block. @@ -4156,6 +4169,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; +retry: + seq = read_seqbegin(&devnet_rename_seq); rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); if (!dev) { @@ -4165,6 +4180,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) strcpy(ifr.ifr_name, dev->name); rcu_read_unlock(); + if (read_seqretry(&devnet_rename_seq, seq)) + goto retry; if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; diff --git a/net/core/sock.c b/net/core/sock.c index d4f7b58b3866..a692ef49c9bb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -505,7 +505,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) } EXPORT_SYMBOL(sk_dst_check); -static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) +static int sock_setbindtodevice(struct sock *sk, char __user *optval, + int optlen) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES @@ -562,6 +563,59 @@ out: return ret; } +static int sock_getbindtodevice(struct sock *sk, char __user *optval, + int __user *optlen, int len) +{ + int ret = -ENOPROTOOPT; +#ifdef CONFIG_NETDEVICES + struct net *net = sock_net(sk); + struct net_device *dev; + char devname[IFNAMSIZ]; + unsigned seq; + + if (sk->sk_bound_dev_if == 0) { + len = 0; + goto zero; + } + + ret = -EINVAL; + if (len < IFNAMSIZ) + goto out; + +retry: + seq = read_seqbegin(&devnet_rename_seq); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + ret = -ENODEV; + if (!dev) { + rcu_read_unlock(); + goto out; + } + + strcpy(devname, dev->name); + rcu_read_unlock(); + if (read_seqretry(&devnet_rename_seq, seq)) + goto retry; + + len = strlen(devname) + 1; + + ret = -EFAULT; + if (copy_to_user(optval, devname, len)) + goto out; + +zero: + ret = -EFAULT; + if (put_user(len, optlen)) + goto out; + + ret = 0; + +out: +#endif + + return ret; +} + static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) { if (valbool) @@ -589,7 +643,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, */ if (optname == SO_BINDTODEVICE) - return sock_bindtodevice(sk, optval, optlen); + return sock_setbindtodevice(sk, optval, optlen); if (optlen < sizeof(int)) return -EINVAL; @@ -1075,15 +1129,17 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_NOFCS: v.val = sock_flag(sk, SOCK_NOFCS); break; + case SO_BINDTODEVICE: - v.val = sk->sk_bound_dev_if; - break; + return sock_getbindtodevice(sk, optval, optlen, len); + case SO_GET_FILTER: len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); if (len < 0) return len; goto lenout; + default: return -ENOPROTOOPT; } -- cgit v1.2.3 From bb728820fe7c42fdb838ab2745fb5fe6b18b5ffa Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Wed, 28 Nov 2012 21:55:25 +0000 Subject: core: make GRO methods static. This patch changes three methods to be static and removes their EXPORT_SYMBOLs in core/dev.c and their external declaration in netdevice.h. The methods, dev_gro_receive(), napi_frags_finish() and napi_skb_finish(), which are in the GRO rx path, are not used outside core/dev.c. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ------ net/core/dev.c | 9 +++------ 2 files changed, 3 insertions(+), 12 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e9929abeb932..18c5dc98f6dc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2153,16 +2153,10 @@ extern void dev_kfree_skb_any(struct sk_buff *skb); extern int netif_rx(struct sk_buff *skb); extern int netif_rx_ni(struct sk_buff *skb); extern int netif_receive_skb(struct sk_buff *skb); -extern gro_result_t dev_gro_receive(struct napi_struct *napi, - struct sk_buff *skb); -extern gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb); extern gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); extern void napi_gro_flush(struct napi_struct *napi, bool flush_old); extern struct sk_buff * napi_get_frags(struct napi_struct *napi); -extern gro_result_t napi_frags_finish(struct napi_struct *napi, - struct sk_buff *skb, - gro_result_t ret); extern gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) diff --git a/net/core/dev.c b/net/core/dev.c index 2a5f55866429..2f94df257e5a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3592,7 +3592,7 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old) } EXPORT_SYMBOL(napi_gro_flush); -enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; struct packet_offload *ptype; @@ -3683,7 +3683,6 @@ normal: ret = GRO_NORMAL; goto pull; } -EXPORT_SYMBOL(dev_gro_receive); static inline gro_result_t __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) @@ -3710,7 +3709,7 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) return dev_gro_receive(napi, skb); } -gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) +static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { switch (ret) { case GRO_NORMAL: @@ -3736,7 +3735,6 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) return ret; } -EXPORT_SYMBOL(napi_skb_finish); static void skb_gro_reset_offset(struct sk_buff *skb) { @@ -3788,7 +3786,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_get_frags); -gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, +static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, gro_result_t ret) { switch (ret) { @@ -3813,7 +3811,6 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, return ret; } -EXPORT_SYMBOL(napi_frags_finish); static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { -- cgit v1.2.3 From 4e66ae2ea371cf431283e2cb95480eb860432856 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Mon, 3 Dec 2012 16:17:12 +0000 Subject: net: dev_change_net_namespace: send a KOBJ_REMOVED/KOBJ_ADD When a new nic is created in namespace ns1, the kernel sends a KOBJ_ADD uevent to ns1. When the nic is moved to ns2, we only send a KOBJ_MOVE to ns2, and nothing to ns1. This patch changes that behavior so that when moving a nic from ns1 to ns2, we send a KOBJ_REMOVED to ns1 and KOBJ_ADD to ns2. (The KOBJ_MOVE is still sent to ns2). The effects of this can be seen when starting and stopping containers in an upstart based host. Lxc will create a pair of veth nics, the kernel sends KOBJ_ADD, and upstart starts network-instance jobs for each. When one nic is moved to the container, because no KOBJ_REMOVED event is received, the network-instance job for that veth never goes away. This was reported at https://bugs.launchpad.net/ubuntu/+source/lxc/+bug/1065589 With this patch the networ-instance jobs properly go away. The other oddness solved here is that if a nic is passed into a running upstart-based container, without this patch no network-instance job is started in the container. But when the container creates a new nic itself (ip link add new type veth) then network-interface jobs are created. With this patch, behavior comes in line with a regular host. v2: also send KOBJ_ADD to new netns. There will then be a _MOVE event from the device_rename() call, but that should be innocuous. Signed-off-by: Serge Hallyn Acked-by: "Eric W. Biederman" Acked-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 2f94df257e5a..0aea3fee7f6d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6418,6 +6418,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_uc_flush(dev); dev_mc_flush(dev); + /* Send a netdev-removed uevent to the old namespace */ + kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); + /* Actually switch the network namespace */ dev_net_set(dev, net); @@ -6429,6 +6432,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev->iflink = dev->ifindex; } + /* Send a netdev-add uevent to the new namespace */ + kobject_uevent(&dev->dev.kobj, KOBJ_ADD); + /* Fixup kobjects */ err = device_rename(&dev->dev, dev->name); WARN_ON(err); -- cgit v1.2.3 From e3d8fabee3b66ce158b2603f270479b84b6e4ba7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 3 Dec 2012 01:16:32 +0000 Subject: net: call notifiers for mtu change even if iface is not up Do the same thing as in set mac. Call notifiers every time. Signed-off-by: Jiri Pirko Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 0aea3fee7f6d..307142a702d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4971,7 +4971,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) else dev->mtu = new_mtu; - if (!err && dev->flags & IFF_UP) + if (!err) call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; } -- cgit v1.2.3 From fc70fb640b159f1d6bf5ad2321cd55e874c8d1b8 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 7 Dec 2012 14:14:15 +0000 Subject: net: Handle encapsulated offloads before fragmentation or handing to lower dev This change allows the VXLAN to enable Tx checksum offloading even on devices that do not support encapsulated checksum offloads. The advantage to this is that it allows for the lower device to change due to routing table changes without impacting features on the VXLAN itself. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/dev.c | 15 +++++++++++++-- net/ipv4/ip_output.c | 4 ++++ 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 307142a702d5..a4c4a1bf07d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2324,6 +2324,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } + /* If encapsulation offload request, verify we are testing + * hardware encapsulation features instead of standard + * features for the netdev + */ + if (skb->encapsulation) + features &= dev->hw_enc_features; + if (netif_needs_gso(skb, features)) { if (unlikely(dev_gso_segment(skb, features))) goto out_kfree_skb; @@ -2339,8 +2346,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, * checksumming here. */ if (skb->ip_summed == CHECKSUM_PARTIAL) { - skb_set_transport_header(skb, - skb_checksum_start_offset(skb)); + if (skb->encapsulation) + skb_set_inner_transport_header(skb, + skb_checksum_start_offset(skb)); + else + skb_set_transport_header(skb, + skb_checksum_start_offset(skb)); if (!(features & NETIF_F_ALL_CSUM) && skb_checksum_help(skb)) goto out_kfree_skb; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6537a408a4fb..3e98ed2bff55 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -595,6 +595,10 @@ slow_path_clean: } slow_path: + /* for offloaded checksums cleanup checksum before fragmentation */ + if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb)) + goto fail; + left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ -- cgit v1.2.3 From 89c5fa3369a47db0df904c45c1c26e64c0404430 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Dec 2012 13:28:16 +0000 Subject: net: gro: dev_gro_receive() cleanup __napi_gro_receive() is inlined from two call sites for no good reason. Lets move the prep stuff in a function of its own, called only if/when needed. This saves 300 bytes on x86 : # size net/core/dev.o.after net/core/dev.o.before text data bss dec hex filename 51968 1238 1040 54246 d3e6 net/core/dev.o.before 51664 1238 1040 53942 d2b6 net/core/dev.o.after Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a4c4a1bf07d5..47838509f5fd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3603,6 +3603,28 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old) } EXPORT_SYMBOL(napi_gro_flush); +static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) +{ + struct sk_buff *p; + unsigned int maclen = skb->dev->hard_header_len; + + for (p = napi->gro_list; p; p = p->next) { + unsigned long diffs; + + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; + diffs |= p->vlan_tci ^ skb->vlan_tci; + if (maclen == ETH_HLEN) + diffs |= compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); + else if (!diffs) + diffs = memcmp(skb_mac_header(p), + skb_gro_mac_header(skb), + maclen); + NAPI_GRO_CB(p)->same_flow = !diffs; + NAPI_GRO_CB(p)->flush = 0; + } +} + static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -3619,6 +3641,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; + gro_list_prepare(napi, skb); + rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { if (ptype->type != type || !ptype->callbacks.gro_receive) @@ -3695,30 +3719,6 @@ normal: goto pull; } -static inline gro_result_t -__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) -{ - struct sk_buff *p; - unsigned int maclen = skb->dev->hard_header_len; - - for (p = napi->gro_list; p; p = p->next) { - unsigned long diffs; - - diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; - diffs |= p->vlan_tci ^ skb->vlan_tci; - if (maclen == ETH_HLEN) - diffs |= compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); - else if (!diffs) - diffs = memcmp(skb_mac_header(p), - skb_gro_mac_header(skb), - maclen); - NAPI_GRO_CB(p)->same_flow = !diffs; - NAPI_GRO_CB(p)->flush = 0; - } - - return dev_gro_receive(napi, skb); -} static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { @@ -3768,7 +3768,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { skb_gro_reset_offset(skb); - return napi_skb_finish(__napi_gro_receive(napi, skb), skb); + return napi_skb_finish(dev_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -3866,7 +3866,7 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) if (!skb) return GRO_DROP; - return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); + return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); } EXPORT_SYMBOL(napi_gro_frags); -- cgit v1.2.3