From 37ed9493699cc5dafe1b8725858ef73176fdc9d2 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Thu, 5 Mar 2015 21:21:13 -0800 Subject: rtnetlink: add RTNH_F_EXTERNAL flag for fib offload Add new RTNH_F_EXTERNAL flag to mark fib entries offloaded externally, for example to a switchdev switch device. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 06f75a407f74..c3722b024e73 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -334,6 +334,7 @@ struct rtnexthop { #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_EXTERNAL 8 /* Route installed externally */ /* Macros to handle hexthops */ -- cgit v1.2.3 From 4586f1bb911ce219a4bc1c2a9d6eee2e058b2b51 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Thu, 5 Mar 2015 21:21:14 -0800 Subject: netdevice: add IPv4 fib add/del ops Add two new ndo ops for IPv4 fib offload support, add and del. Add uses modifiy semantics if fib entry already offloaded. Drivers implementing the new ndo ops will return err<0 if programming device fails, for example if device's tables are full. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 625c8d71511b..45413784a3b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -768,6 +768,8 @@ struct netdev_phys_item_id { typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); +struct fib_info; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1031,6 +1033,14 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); * Called to notify switch device port of bridge port STP * state change. + * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst, + * int dst_len, struct fib_info *fi, + * u8 tos, u8 type, u32 tb_id); + * Called to add/modify IPv4 route to switch device. + * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst, + * int dst_len, struct fib_info *fi, + * u8 tos, u8 type, u32 tb_id); + * Called to delete IPv4 route from switch device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1192,6 +1202,18 @@ struct net_device_ops { struct netdev_phys_item_id *psid); int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); + int (*ndo_switch_fib_ipv4_add)(struct net_device *dev, + __be32 dst, + int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 tb_id); + int (*ndo_switch_fib_ipv4_del)(struct net_device *dev, + __be32 dst, + int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 tb_id); #endif }; -- cgit v1.2.3 From 5e8d90497d65f528c54015644095ace6e330fd8e Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Thu, 5 Mar 2015 21:21:15 -0800 Subject: switchdev: add IPv4 fib ndo ops wrappers Add IPv4 fib ndo wrapper funcs and stub them out for now. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/net/switchdev.h | 19 +++++++++++++++++++ net/switchdev/switchdev.c | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index cfcdac2e5d25..8d2ac663325a 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -51,6 +51,11 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); +int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id); +int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id); + #else static inline int netdev_switch_parent_id_get(struct net_device *dev, @@ -109,6 +114,20 @@ static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device * return 0; } +static inline int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) +{ + return 0; +} + +static inline int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) +{ + return 0; +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8c1e558db118..3c090f8d071b 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -14,6 +14,7 @@ #include #include #include +#include #include /** @@ -225,3 +226,41 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, return ret; } EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink); + +/** + * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch + * + * @dst: route's IPv4 destination address + * @dst_len: destination address length (prefix length) + * @fi: route FIB info structure + * @tos: route TOS + * @type: route type + * @tb_id: route table ID + * + * Add IPv4 route entry to switch device. + */ +int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) +{ + return 0; +} +EXPORT_SYMBOL(netdev_switch_fib_ipv4_add); + +/** + * netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch + * + * @dst: route's IPv4 destination address + * @dst_len: destination address length (prefix length) + * @fi: route FIB info structure + * @tos: route TOS + * @type: route type + * @tb_id: route table ID + * + * Delete IPv4 route entry from switch device. + */ +int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) +{ + return 0; +} +EXPORT_SYMBOL(netdev_switch_fib_ipv4_del); -- cgit v1.2.3 From 104616e74e0b464d449fdd2ee2f547d2fad71610 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Thu, 5 Mar 2015 21:21:16 -0800 Subject: switchdev: don't support custom ip rules, for now Keep switchdev FIB offload model simple for now and don't allow custom ip rules. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 ++ net/ipv4/fib_frontend.c | 13 ++++++++++ net/ipv4/fib_rules.c | 3 +++ net/ipv4/fib_trie.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++ net/switchdev/switchdev.c | 4 ++++ 5 files changed, 83 insertions(+) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 825cb2800908..1657604c5dd3 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -196,6 +196,7 @@ int fib_table_delete(struct fib_table *, struct fib_config *); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct fib_table *table); +void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); @@ -308,6 +309,7 @@ static inline int fib_num_tclassid_users(struct net *net) return 0; } #endif +void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 220c4b4af4cf..e067770235bf 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -144,6 +144,19 @@ static void fib_flush(struct net *net) rt_cache_flush(net); } +void fib_flush_external(struct net *net) +{ + struct fib_table *tb; + struct hlist_head *head; + unsigned int h; + + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + head = &net->ipv4.fib_table_hash[h]; + hlist_for_each_entry(tb, head, tb_hlist) + fib_table_flush_external(tb); + } +} + /* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index d3db718be51d..190d0d00d744 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -209,6 +209,8 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; + fib_flush_external(rule->fr_net); + err = 0; errout: return err; @@ -224,6 +226,7 @@ static void fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; + fib_flush_external(rule->fr_net); } static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index fae34ad4bb1a..2de43956c9d0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1536,6 +1536,67 @@ found: return n; } +/* Caller must hold RTNL */ +void fib_table_flush_external(struct fib_table *tb) +{ + struct trie *t = (struct trie *)tb->tb_data; + struct fib_alias *fa; + struct tnode *n, *pn; + unsigned long cindex; + unsigned char slen; + int found = 0; + + n = rcu_dereference(t->trie); + if (!n) + return; + + pn = NULL; + cindex = 0; + + while (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; +backtrace: + /* walk trie in reverse order */ + do { + while (!(cindex--)) { + t_key pkey = pn->key; + + n = pn; + pn = node_parent(n); + + /* resize completed node */ + resize(t, n); + + /* if we got the root we are done */ + if (!pn) + return; + + cindex = get_index(pkey, pn); + } + + /* grab the next available node */ + n = tnode_get_child(pn, cindex); + } while (!n); + } + + hlist_for_each_entry(fa, &n->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + + if (fi && (fi->fib_flags & RTNH_F_EXTERNAL)) { + netdev_switch_fib_ipv4_del(n->key, + KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, + fa->fa_type, tb->tb_id); + } + } + + /* if trie is leaf only loop is completed */ + if (pn) + goto backtrace; +} + /* Caller must hold RTNL. */ int fib_table_flush(struct fib_table *tb) { diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 3c090f8d071b..81c4c0274b9b 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -242,6 +242,10 @@ EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink); int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id) { + /* Don't offload route if using custom ip rules */ + if (fi->fib_net->ipv4.fib_has_custom_rules) + return 0; + return 0; } EXPORT_SYMBOL(netdev_switch_fib_ipv4_add); -- cgit v1.2.3 From b5d6fbdeede861b52d67b9a4ea3fdfcc6e6865cd Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Thu, 5 Mar 2015 21:21:17 -0800 Subject: switchdev: implement IPv4 fib ndo wrappers Flesh out ndo wrappers to call into device driver. To call into device driver, the wrapper must interate over route's nexthops to ensure all nexthop devs belong to the same switch device. Currently, there is no support for route's nexthops spanning offloaded and non-offloaded devices, or spanning ports of multiple offload devices. Since switch device ports may be stacked under virtual interfaces (bonds and/or bridges), and the route's nexthop may be on the virtual interface, the wrapper will traverse the nexthop dev down to the base dev. It's the base dev that's passed to the switchdev driver's ndo ops. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 2 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 81c4c0274b9b..99907d829419 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -227,6 +227,65 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, } EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink); +static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct net_device *lower_dev; + struct net_device *port_dev; + struct list_head *iter; + + /* Recusively search down until we find a sw port dev. + * (A sw port dev supports ndo_switch_parent_id_get). + */ + + if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD && + ops->ndo_switch_parent_id_get) + return dev; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + port_dev = netdev_switch_get_lowest_dev(lower_dev); + if (port_dev) + return port_dev; + } + + return NULL; +} + +static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) +{ + struct netdev_phys_item_id psid; + struct netdev_phys_item_id prev_psid; + struct net_device *dev = NULL; + int nhsel; + + /* For this route, all nexthop devs must be on the same switch. */ + + for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + const struct fib_nh *nh = &fi->fib_nh[nhsel]; + + if (!nh->nh_dev) + return NULL; + + dev = netdev_switch_get_lowest_dev(nh->nh_dev); + if (!dev) + return NULL; + + if (netdev_switch_parent_id_get(dev, &psid)) + return NULL; + + if (nhsel > 0) { + if (prev_psid.id_len != psid.id_len) + return NULL; + if (memcmp(prev_psid.id, psid.id, psid.id_len)) + return NULL; + } + + prev_psid = psid; + } + + return dev; +} + /** * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch * @@ -242,11 +301,27 @@ EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink); int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id) { + struct net_device *dev; + const struct net_device_ops *ops; + int err = 0; + /* Don't offload route if using custom ip rules */ if (fi->fib_net->ipv4.fib_has_custom_rules) return 0; - return 0; + dev = netdev_switch_get_dev_by_nhs(fi); + if (!dev) + return 0; + ops = dev->netdev_ops; + + if (ops->ndo_switch_fib_ipv4_add) { + err = ops->ndo_switch_fib_ipv4_add(dev, htonl(dst), dst_len, + fi, tos, type, tb_id); + if (!err) + fi->fib_flags |= RTNH_F_EXTERNAL; + } + + return err; } EXPORT_SYMBOL(netdev_switch_fib_ipv4_add); @@ -265,6 +340,25 @@ EXPORT_SYMBOL(netdev_switch_fib_ipv4_add); int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id) { - return 0; + struct net_device *dev; + const struct net_device_ops *ops; + int err = 0; + + if (!(fi->fib_flags & RTNH_F_EXTERNAL)) + return 0; + + dev = netdev_switch_get_dev_by_nhs(fi); + if (!dev) + return 0; + ops = dev->netdev_ops; + + if (ops->ndo_switch_fib_ipv4_del) { + err = ops->ndo_switch_fib_ipv4_del(dev, htonl(dst), dst_len, + fi, tos, type, tb_id); + if (!err) + fi->fib_flags &= ~RTNH_F_EXTERNAL; + } + + return err; } EXPORT_SYMBOL(netdev_switch_fib_ipv4_del); -- cgit v1.2.3 From 448b128a14501543748514a4f9adedd3c0da2e85 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Thu, 5 Mar 2015 21:21:18 -0800 Subject: ipv4: add net bool fib_offload_disabled If something goes wrong with IPv4 FIB offload, mark entire net offload disabled. This is brute force policy to basically shut down IPv4 FIB offload permanently if there is a problem offloading any route to an external device. We can refine the policy in the future, to handle failures on a per-device or per-route basis, but for now, this policy is per-net. What we're trying to avoid is an inconsistent split between the kernel's FIB and the offload device's FIB. We don't want the device to fwd a pkt inconsitent with what the kernel would do. An example of a split is if device has 10.0.0.0/16 and kernel has 10.0.0.0/16 and 10.0.0.0/24, the device wouldn't see the longest prefix 10.0.0.0/24 and potentially forward pkts incorrectly. Limited capacity or limited capability are two ways a route may fail to install to the offload device. We'll not differentiate between failures at this time, and treat any failure as fatal and mark the net as fib_offload_disabled. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index db1db158a00e..1085e12f940f 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -47,6 +47,7 @@ struct netns_ipv4 { int fib_num_tclassid_users; #endif struct hlist_head *fib_table_hash; + bool fib_offload_disabled; struct sock *fibnl; struct sock * __percpu *icmp_sk; -- cgit v1.2.3 From 8e05fd7166c6123334b7a739a697d677747aa462 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Thu, 5 Mar 2015 21:21:19 -0800 Subject: fib: hook IPv4 fib for hardware offload Call into the switchdev driver any time an IPv4 fib entry is added/modified/deleted from the kernel's FIB. The switchdev driver may or may not install the route to the offload device. In the case where the driver tries to install the route and something goes wrong (device's routing table is full, etc), then all of the offloaded routes will be flushed from the device, route forwarding falls back to the kernel, and no more routes are offloading. We can refine this logic later. For now, use the simplist model of offloading routes up to the point of failure, and then on failure, undo everything and mark IPv4 offloading disabled. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/net/switchdev.h | 5 +++++ net/ipv4/fib_trie.c | 31 ++++++++++++++++++++++++++++++- net/switchdev/switchdev.c | 28 ++++++++++++++++++++++++++-- 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 8d2ac663325a..dc0a5cc7c2c5 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -55,6 +55,7 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id); int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id); +void netdev_switch_fib_ipv4_abort(struct fib_info *fi); #else @@ -128,6 +129,10 @@ static inline int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, return 0; } +void netdev_switch_fib_ipv4_abort(struct fib_info *fi) +{ +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2de43956c9d0..6544f1a0cfa1 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -79,6 +79,7 @@ #include #include #include +#include #include "fib_lookup.h" #define MAX_STAT_DEPTH 32 @@ -1135,7 +1136,18 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = state & ~FA_S_ACCESSED; new_fa->fa_slen = fa->fa_slen; + err = netdev_switch_fib_ipv4_add(key, plen, fi, + new_fa->fa_tos, + cfg->fc_type, + tb->tb_id); + if (err) { + netdev_switch_fib_ipv4_abort(fi); + kmem_cache_free(fn_alias_kmem, new_fa); + goto out; + } + hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); + alias_free_mem_rcu(fa); fib_release_info(fi_drop); @@ -1171,10 +1183,18 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = 0; new_fa->fa_slen = slen; + /* (Optionally) offload fib entry to switch hardware. */ + err = netdev_switch_fib_ipv4_add(key, plen, fi, tos, + cfg->fc_type, tb->tb_id); + if (err) { + netdev_switch_fib_ipv4_abort(fi); + goto out_free_new_fa; + } + /* Insert new entry to the list. */ err = fib_insert_alias(t, tp, l, new_fa, fa, key); if (err) - goto out_free_new_fa; + goto out_sw_fib_del; if (!plen) tb->tb_num_default++; @@ -1185,6 +1205,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) succeeded: return 0; +out_sw_fib_del: + netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1456,6 +1478,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if (!fa_to_delete) return -ESRCH; + netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, + cfg->fc_type, tb->tb_id); + rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1650,6 +1675,10 @@ backtrace: struct fib_info *fi = fa->fa_info; if (fi && (fi->fib_flags & RTNH_F_DEAD)) { + netdev_switch_fib_ipv4_del(n->key, + KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, + fa->fa_type, tb->tb_id); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 99907d829419..f4fd575aa2a3 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -305,8 +305,12 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, const struct net_device_ops *ops; int err = 0; - /* Don't offload route if using custom ip rules */ - if (fi->fib_net->ipv4.fib_has_custom_rules) + /* Don't offload route if using custom ip rules or if + * IPv4 FIB offloading has been disabled completely. + */ + + if (fi->fib_net->ipv4.fib_has_custom_rules | + fi->fib_net->ipv4.fib_offload_disabled) return 0; dev = netdev_switch_get_dev_by_nhs(fi); @@ -362,3 +366,23 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, return err; } EXPORT_SYMBOL(netdev_switch_fib_ipv4_del); + +/** + * netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation + * + * @fi: route FIB info structure + */ +void netdev_switch_fib_ipv4_abort(struct fib_info *fi) +{ + /* There was a problem installing this route to the offload + * device. For now, until we come up with more refined + * policy handling, abruptly end IPv4 fib offloading for + * for entire net by flushing offload device(s) of all + * IPv4 routes, and mark IPv4 fib offloading broken from + * this point forward. + */ + + fib_flush_external(fi->fib_net); + fi->fib_net->ipv4.fib_offload_disabled = true; +} +EXPORT_SYMBOL(netdev_switch_fib_ipv4_abort); -- cgit v1.2.3 From c1beeef7a32a791a60e2adcc217d4461cd1e25d1 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Thu, 5 Mar 2015 21:21:20 -0800 Subject: rocker: implement IPv4 fib offloading The driver implements ndo_switch_fib_ipv4_add/del ops to add/del/mod IPv4 routes to/from switchdev device. Once a route is added to the device, and the route's nexthops are resolved to neighbor MAC address, the device will forward matching pkts rather than the kernel. This offloads the L3 forwarding path from the kernel to the device. Note that control and management planes are still mananged by Linux; only the data plane is offloaded. Standard routing control protocols such as OSPF and BGP run on Linux and manage the kernel's FIB via standard rtm netlink msgs...nothing changes here. A new hash table is added to rocker to track neighbors. The driver listens for neighbor updates events using netevent notifier NETEVENT_NEIGH_UPDATE. Any ARP table updates for ports on this device are recorded in this table. Routes installed to the device with nexthops that reference neighbors in this table are "qualified". In the case of a route with nexthops not resolved in the table, the kernel is asked to resolve the nexthop. The driver uses fib_info->fib_priority for the priority field in rocker's unicast routing table. The device can only forward to pkts matching route dst to resolved nexthops. Currently, the device only supports single-path routes (i.e. routes with one nexthop). Equal Cost Multipath (ECMP) route support will be added in followup patches. This patch is driver support for unicast IPv4 routing only. Followup patches will add driver and infrastructure for IPv6 routing and multicast routing. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 483 +++++++++++++++++++++++++++++++---- 1 file changed, 436 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index a5d1e6ea7d58..d04d3b374e31 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -32,6 +32,9 @@ #include #include #include +#include +#include +#include #include #include @@ -111,9 +114,10 @@ struct rocker_flow_tbl_key { struct rocker_flow_tbl_entry { struct hlist_node entry; - u32 ref_count; + u32 cmd; u64 cookie; struct rocker_flow_tbl_key key; + size_t key_len; u32 key_crc32; /* key */ }; @@ -161,6 +165,16 @@ struct rocker_internal_vlan_tbl_entry { __be16 vlan_id; }; +struct rocker_neigh_tbl_entry { + struct hlist_node entry; + __be32 ip_addr; /* key */ + struct net_device *dev; + u32 ref_count; + u32 index; + u8 eth_dst[ETH_ALEN]; + bool ttl_check; +}; + struct rocker_desc_info { char *data; /* mapped */ size_t data_size; @@ -234,6 +248,9 @@ struct rocker { unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN]; DECLARE_HASHTABLE(internal_vlan_tbl, 8); spinlock_t internal_vlan_tbl_lock; + DECLARE_HASHTABLE(neigh_tbl, 16); + spinlock_t neigh_tbl_lock; + u32 neigh_tbl_next_index; }; static const u8 zero_mac[ETH_ALEN] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; @@ -256,7 +273,6 @@ enum { ROCKER_PRIORITY_VLAN = 1, ROCKER_PRIORITY_TERM_MAC_UCAST = 0, ROCKER_PRIORITY_TERM_MAC_MCAST = 1, - ROCKER_PRIORITY_UNICAST_ROUTING = 1, ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_EXACT = 1, ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_WILD = 2, ROCKER_PRIORITY_BRIDGING_VLAN = 3, @@ -1940,8 +1956,7 @@ static int rocker_cmd_flow_tbl_add(struct rocker *rocker, struct rocker_tlv *cmd_info; int err = 0; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, - ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD)) + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd)) return -EMSGSIZE; cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); if (!cmd_info) @@ -1998,8 +2013,7 @@ static int rocker_cmd_flow_tbl_del(struct rocker *rocker, const struct rocker_flow_tbl_entry *entry = priv; struct rocker_tlv *cmd_info; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, - ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL)) + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd)) return -EMSGSIZE; cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); if (!cmd_info) @@ -2168,9 +2182,9 @@ static int rocker_cmd_group_tbl_del(struct rocker *rocker, return 0; } -/***************************************** - * Flow, group, FDB, internal VLAN tables - *****************************************/ +/*************************************************** + * Flow, group, FDB, internal VLAN and neigh tables + ***************************************************/ static int rocker_init_tbls(struct rocker *rocker) { @@ -2186,6 +2200,9 @@ static int rocker_init_tbls(struct rocker *rocker) hash_init(rocker->internal_vlan_tbl); spin_lock_init(&rocker->internal_vlan_tbl_lock); + hash_init(rocker->neigh_tbl); + spin_lock_init(&rocker->neigh_tbl_lock); + return 0; } @@ -2196,6 +2213,7 @@ static void rocker_free_tbls(struct rocker *rocker) struct rocker_group_tbl_entry *group_entry; struct rocker_fdb_tbl_entry *fdb_entry; struct rocker_internal_vlan_tbl_entry *internal_vlan_entry; + struct rocker_neigh_tbl_entry *neigh_entry; struct hlist_node *tmp; int bkt; @@ -2219,16 +2237,22 @@ static void rocker_free_tbls(struct rocker *rocker) tmp, internal_vlan_entry, entry) hash_del(&internal_vlan_entry->entry); spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, flags); + + spin_lock_irqsave(&rocker->neigh_tbl_lock, flags); + hash_for_each_safe(rocker->neigh_tbl, bkt, tmp, neigh_entry, entry) + hash_del(&neigh_entry->entry); + spin_unlock_irqrestore(&rocker->neigh_tbl_lock, flags); } static struct rocker_flow_tbl_entry * rocker_flow_tbl_find(struct rocker *rocker, struct rocker_flow_tbl_entry *match) { struct rocker_flow_tbl_entry *found; + size_t key_len = match->key_len ? match->key_len : sizeof(found->key); hash_for_each_possible(rocker->flow_tbl, found, entry, match->key_crc32) { - if (memcmp(&found->key, &match->key, sizeof(found->key)) == 0) + if (memcmp(&found->key, &match->key, key_len) == 0) return found; } @@ -2241,42 +2265,34 @@ static int rocker_flow_tbl_add(struct rocker_port *rocker_port, { struct rocker *rocker = rocker_port->rocker; struct rocker_flow_tbl_entry *found; + size_t key_len = match->key_len ? match->key_len : sizeof(found->key); unsigned long flags; - bool add_to_hw = false; - int err = 0; - match->key_crc32 = crc32(~0, &match->key, sizeof(match->key)); + match->key_crc32 = crc32(~0, &match->key, key_len); spin_lock_irqsave(&rocker->flow_tbl_lock, flags); found = rocker_flow_tbl_find(rocker, match); if (found) { - kfree(match); + match->cookie = found->cookie; + hash_del(&found->entry); + kfree(found); + found = match; + found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD; } else { found = match; found->cookie = rocker->flow_tbl_next_cookie++; - hash_add(rocker->flow_tbl, &found->entry, found->key_crc32); - add_to_hw = true; + found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD; } - found->ref_count++; + hash_add(rocker->flow_tbl, &found->entry, found->key_crc32); spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags); - if (add_to_hw) { - err = rocker_cmd_exec(rocker, rocker_port, - rocker_cmd_flow_tbl_add, - found, NULL, NULL, nowait); - if (err) { - spin_lock_irqsave(&rocker->flow_tbl_lock, flags); - hash_del(&found->entry); - spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags); - kfree(found); - } - } - - return err; + return rocker_cmd_exec(rocker, rocker_port, + rocker_cmd_flow_tbl_add, + found, NULL, NULL, nowait); } static int rocker_flow_tbl_del(struct rocker_port *rocker_port, @@ -2285,29 +2301,26 @@ static int rocker_flow_tbl_del(struct rocker_port *rocker_port, { struct rocker *rocker = rocker_port->rocker; struct rocker_flow_tbl_entry *found; + size_t key_len = match->key_len ? match->key_len : sizeof(found->key); unsigned long flags; - bool del_from_hw = false; int err = 0; - match->key_crc32 = crc32(~0, &match->key, sizeof(match->key)); + match->key_crc32 = crc32(~0, &match->key, key_len); spin_lock_irqsave(&rocker->flow_tbl_lock, flags); found = rocker_flow_tbl_find(rocker, match); if (found) { - found->ref_count--; - if (found->ref_count == 0) { - hash_del(&found->entry); - del_from_hw = true; - } + hash_del(&found->entry); + found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL; } spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags); kfree(match); - if (del_from_hw) { + if (found) { err = rocker_cmd_exec(rocker, rocker_port, rocker_cmd_flow_tbl_del, found, NULL, NULL, nowait); @@ -2467,6 +2480,31 @@ static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port, return rocker_flow_tbl_do(rocker_port, flags, entry); } +static int rocker_flow_tbl_ucast4_routing(struct rocker_port *rocker_port, + __be16 eth_type, __be32 dst, + __be32 dst_mask, u32 priority, + enum rocker_of_dpa_table_id goto_tbl, + u32 group_id, int flags) +{ + struct rocker_flow_tbl_entry *entry; + + entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + if (!entry) + return -ENOMEM; + + entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING; + entry->key.priority = priority; + entry->key.ucast_routing.eth_type = eth_type; + entry->key.ucast_routing.dst4 = dst; + entry->key.ucast_routing.dst4_mask = dst_mask; + entry->key.ucast_routing.goto_tbl = goto_tbl; + entry->key.ucast_routing.group_id = group_id; + entry->key_len = offsetof(struct rocker_flow_tbl_key, + ucast_routing.group_id); + + return rocker_flow_tbl_do(rocker_port, flags, entry); +} + static int rocker_flow_tbl_acl(struct rocker_port *rocker_port, int flags, u32 in_pport, u32 in_pport_mask, @@ -2554,7 +2592,6 @@ static int rocker_group_tbl_add(struct rocker_port *rocker_port, struct rocker *rocker = rocker_port->rocker; struct rocker_group_tbl_entry *found; unsigned long flags; - int err = 0; spin_lock_irqsave(&rocker->group_tbl_lock, flags); @@ -2574,12 +2611,9 @@ static int rocker_group_tbl_add(struct rocker_port *rocker_port, spin_unlock_irqrestore(&rocker->group_tbl_lock, flags); - if (found->cmd) - err = rocker_cmd_exec(rocker, rocker_port, - rocker_cmd_group_tbl_add, - found, NULL, NULL, nowait); - - return err; + return rocker_cmd_exec(rocker, rocker_port, + rocker_cmd_group_tbl_add, + found, NULL, NULL, nowait); } static int rocker_group_tbl_del(struct rocker_port *rocker_port, @@ -2675,6 +2709,244 @@ static int rocker_group_l2_flood(struct rocker_port *rocker_port, group_id); } +static int rocker_group_l3_unicast(struct rocker_port *rocker_port, + int flags, u32 index, u8 *src_mac, + u8 *dst_mac, __be16 vlan_id, + bool ttl_check, u32 pport) +{ + struct rocker_group_tbl_entry *entry; + + entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + if (!entry) + return -ENOMEM; + + entry->group_id = ROCKER_GROUP_L3_UNICAST(index); + if (src_mac) + ether_addr_copy(entry->l3_unicast.eth_src, src_mac); + if (dst_mac) + ether_addr_copy(entry->l3_unicast.eth_dst, dst_mac); + entry->l3_unicast.vlan_id = vlan_id; + entry->l3_unicast.ttl_check = ttl_check; + entry->l3_unicast.group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, pport); + + return rocker_group_tbl_do(rocker_port, flags, entry); +} + +static struct rocker_neigh_tbl_entry * + rocker_neigh_tbl_find(struct rocker *rocker, __be32 ip_addr) +{ + struct rocker_neigh_tbl_entry *found; + + hash_for_each_possible(rocker->neigh_tbl, found, entry, ip_addr) + if (found->ip_addr == ip_addr) + return found; + + return NULL; +} + +static void _rocker_neigh_add(struct rocker *rocker, + struct rocker_neigh_tbl_entry *entry) +{ + entry->index = rocker->neigh_tbl_next_index++; + entry->ref_count++; + hash_add(rocker->neigh_tbl, &entry->entry, entry->ip_addr); +} + +static void _rocker_neigh_del(struct rocker *rocker, + struct rocker_neigh_tbl_entry *entry) +{ + if (--entry->ref_count == 0) { + hash_del(&entry->entry); + kfree(entry); + } +} + +static void _rocker_neigh_update(struct rocker *rocker, + struct rocker_neigh_tbl_entry *entry, + u8 *eth_dst, bool ttl_check) +{ + if (eth_dst) { + ether_addr_copy(entry->eth_dst, eth_dst); + entry->ttl_check = ttl_check; + } else { + entry->ref_count++; + } +} + +static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, + int flags, __be32 ip_addr, u8 *eth_dst) +{ + struct rocker *rocker = rocker_port->rocker; + struct rocker_neigh_tbl_entry *entry; + struct rocker_neigh_tbl_entry *found; + unsigned long lock_flags; + __be16 eth_type = htons(ETH_P_IP); + enum rocker_of_dpa_table_id goto_tbl = + ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + u32 group_id; + u32 priority = 0; + bool adding = !(flags & ROCKER_OP_FLAG_REMOVE); + bool updating; + bool removing; + int err = 0; + + entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + if (!entry) + return -ENOMEM; + + spin_lock_irqsave(&rocker->neigh_tbl_lock, lock_flags); + + found = rocker_neigh_tbl_find(rocker, ip_addr); + + updating = found && adding; + removing = found && !adding; + adding = !found && adding; + + if (adding) { + entry->ip_addr = ip_addr; + entry->dev = rocker_port->dev; + ether_addr_copy(entry->eth_dst, eth_dst); + entry->ttl_check = true; + _rocker_neigh_add(rocker, entry); + } else if (removing) { + memcpy(entry, found, sizeof(*entry)); + _rocker_neigh_del(rocker, found); + } else if (updating) { + _rocker_neigh_update(rocker, found, eth_dst, true); + memcpy(entry, found, sizeof(*entry)); + } else { + err = -ENOENT; + } + + spin_unlock_irqrestore(&rocker->neigh_tbl_lock, lock_flags); + + if (err) + goto err_out; + + /* For each active neighbor, we have an L3 unicast group and + * a /32 route to the neighbor, which uses the L3 unicast + * group. The L3 unicast group can also be referred to by + * other routes' nexthops. + */ + + err = rocker_group_l3_unicast(rocker_port, flags, + entry->index, + rocker_port->dev->dev_addr, + entry->eth_dst, + rocker_port->internal_vlan_id, + entry->ttl_check, + rocker_port->pport); + if (err) { + netdev_err(rocker_port->dev, + "Error (%d) L3 unicast group index %d\n", + err, entry->index); + goto err_out; + } + + if (adding || removing) { + group_id = ROCKER_GROUP_L3_UNICAST(entry->index); + err = rocker_flow_tbl_ucast4_routing(rocker_port, + eth_type, ip_addr, + inet_make_mask(32), + priority, goto_tbl, + group_id, flags); + + if (err) + netdev_err(rocker_port->dev, + "Error (%d) /32 unicast route %pI4 group 0x%08x\n", + err, &entry->ip_addr, group_id); + } + +err_out: + if (!adding) + kfree(entry); + + return err; +} + +static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port, + __be32 ip_addr) +{ + struct net_device *dev = rocker_port->dev; + struct neighbour *n = __ipv4_neigh_lookup(dev, ip_addr); + int err = 0; + + if (!n) + n = neigh_create(&arp_tbl, &ip_addr, dev); + if (!n) + return -ENOMEM; + + /* If the neigh is already resolved, then go ahead and + * install the entry, otherwise start the ARP process to + * resolve the neigh. + */ + + if (n->nud_state & NUD_VALID) + err = rocker_port_ipv4_neigh(rocker_port, 0, ip_addr, n->ha); + else + neigh_event_send(n, NULL); + + return err; +} + +static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags, + __be32 ip_addr, u32 *index) +{ + struct rocker *rocker = rocker_port->rocker; + struct rocker_neigh_tbl_entry *entry; + struct rocker_neigh_tbl_entry *found; + unsigned long lock_flags; + bool adding = !(flags & ROCKER_OP_FLAG_REMOVE); + bool updating; + bool removing; + bool resolved = true; + int err = 0; + + entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + if (!entry) + return -ENOMEM; + + spin_lock_irqsave(&rocker->neigh_tbl_lock, lock_flags); + + found = rocker_neigh_tbl_find(rocker, ip_addr); + if (found) + *index = found->index; + + updating = found && adding; + removing = found && !adding; + adding = !found && adding; + + if (adding) { + entry->ip_addr = ip_addr; + entry->dev = rocker_port->dev; + _rocker_neigh_add(rocker, entry); + *index = entry->index; + resolved = false; + } else if (removing) { + _rocker_neigh_del(rocker, found); + } else if (updating) { + _rocker_neigh_update(rocker, found, NULL, false); + resolved = !is_zero_ether_addr(found->eth_dst); + } else { + err = -ENOENT; + } + + spin_unlock_irqrestore(&rocker->neigh_tbl_lock, lock_flags); + + if (!adding) + kfree(entry); + + if (err) + return err; + + /* Resolved means neigh ip_addr is resolved to neigh mac. */ + + if (!resolved) + err = rocker_port_ipv4_resolve(rocker_port, ip_addr); + + return err; +} + static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port, int flags, __be16 vlan_id) { @@ -3429,6 +3701,51 @@ not_found: spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, lock_flags); } +static int rocker_port_fib_ipv4(struct rocker_port *rocker_port, __be32 dst, + int dst_len, struct fib_info *fi, u32 tb_id, + int flags) +{ + struct fib_nh *nh; + __be16 eth_type = htons(ETH_P_IP); + __be32 dst_mask = inet_make_mask(dst_len); + __be16 internal_vlan_id = rocker_port->internal_vlan_id; + u32 priority = fi->fib_priority; + enum rocker_of_dpa_table_id goto_tbl = + ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + u32 group_id; + bool nh_on_port; + bool has_gw; + u32 index; + int err; + + /* XXX support ECMP */ + + nh = fi->fib_nh; + nh_on_port = (fi->fib_dev == rocker_port->dev); + has_gw = !!nh->nh_gw; + + if (has_gw && nh_on_port) { + err = rocker_port_ipv4_nh(rocker_port, flags, + nh->nh_gw, &index); + if (err) + return err; + + group_id = ROCKER_GROUP_L3_UNICAST(index); + } else { + /* Send to CPU for processing */ + group_id = ROCKER_GROUP_L2_INTERFACE(internal_vlan_id, 0); + } + + err = rocker_flow_tbl_ucast4_routing(rocker_port, eth_type, dst, + dst_mask, priority, goto_tbl, + group_id, flags); + if (err) + netdev_err(rocker_port->dev, "Error (%d) IPv4 route %pI4\n", + err, &dst); + + return err; +} + /***************** * Net device ops *****************/ @@ -3830,6 +4147,30 @@ static int rocker_port_switch_port_stp_update(struct net_device *dev, u8 state) return rocker_port_stp_update(rocker_port, state); } +static int rocker_port_switch_fib_ipv4_add(struct net_device *dev, + __be32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + int flags = 0; + + return rocker_port_fib_ipv4(rocker_port, dst, dst_len, + fi, tb_id, flags); +} + +static int rocker_port_switch_fib_ipv4_del(struct net_device *dev, + __be32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + int flags = ROCKER_OP_FLAG_REMOVE; + + return rocker_port_fib_ipv4(rocker_port, dst, dst_len, + fi, tb_id, flags); +} + static const struct net_device_ops rocker_port_netdev_ops = { .ndo_open = rocker_port_open, .ndo_stop = rocker_port_stop, @@ -3844,6 +4185,8 @@ static const struct net_device_ops rocker_port_netdev_ops = { .ndo_bridge_getlink = rocker_port_bridge_getlink, .ndo_switch_parent_id_get = rocker_port_switch_parent_id_get, .ndo_switch_port_stp_update = rocker_port_switch_port_stp_update, + .ndo_switch_fib_ipv4_add = rocker_port_switch_fib_ipv4_add, + .ndo_switch_fib_ipv4_del = rocker_port_switch_fib_ipv4_del, }; /******************** @@ -4204,8 +4547,9 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) NAPI_POLL_WEIGHT); rocker_carrier_init(rocker_port); - dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_HW_SWITCH_OFFLOAD; + dev->features |= NETIF_F_NETNS_LOCAL | + NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_SWITCH_OFFLOAD; err = register_netdev(dev); if (err) { @@ -4546,6 +4890,48 @@ static struct notifier_block rocker_netdevice_nb __read_mostly = { .notifier_call = rocker_netdevice_event, }; +/************************************ + * Net event notifier event handler + ************************************/ + +static int rocker_neigh_update(struct net_device *dev, struct neighbour *n) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + int flags = (n->nud_state & NUD_VALID) ? 0 : ROCKER_OP_FLAG_REMOVE; + __be32 ip_addr = *(__be32 *)n->primary_key; + + return rocker_port_ipv4_neigh(rocker_port, flags, ip_addr, n->ha); +} + +static int rocker_netevent_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev; + struct neighbour *n = ptr; + int err; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + if (n->tbl != &arp_tbl) + return NOTIFY_DONE; + dev = n->dev; + if (!rocker_port_dev_check(dev)) + return NOTIFY_DONE; + err = rocker_neigh_update(dev, n); + if (err) + netdev_warn(dev, + "failed to handle neigh update (err %d)\n", + err); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block rocker_netevent_nb __read_mostly = { + .notifier_call = rocker_netevent_event, +}; + /*********************** * Module init and exit ***********************/ @@ -4555,18 +4941,21 @@ static int __init rocker_module_init(void) int err; register_netdevice_notifier(&rocker_netdevice_nb); + register_netevent_notifier(&rocker_netevent_nb); err = pci_register_driver(&rocker_pci_driver); if (err) goto err_pci_register_driver; return 0; err_pci_register_driver: + unregister_netdevice_notifier(&rocker_netevent_nb); unregister_netdevice_notifier(&rocker_netdevice_nb); return err; } static void __exit rocker_module_exit(void) { + unregister_netevent_notifier(&rocker_netevent_nb); unregister_netdevice_notifier(&rocker_netdevice_nb); pci_unregister_driver(&rocker_pci_driver); } -- cgit v1.2.3