summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/ip6_route.h6
-rw-r--r--net/ipv6/ip6_fib.c157
-rw-r--r--net/ipv6/route.c3
3 files changed, 73 insertions, 93 deletions
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 4cf1ef935ed9..9e4d0f0aeb6d 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,12 +66,6 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}
-static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
-{
- return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
- RTF_GATEWAY;
-}
-
void ip6_route_input(struct sk_buff *skb);
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d1dc6017f5a6..f9132a6de917 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -934,19 +934,19 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
{
struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
lockdep_is_held(&rt->fib6_table->tb6_lock));
- struct fib6_info *iter = NULL;
+ struct fib6_info *iter = NULL, *match = NULL;
struct fib6_info __rcu **ins;
- struct fib6_info __rcu **fallback_ins = NULL;
int replace = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_REPLACE));
+ int append = (info->nlh &&
+ (info->nlh->nlmsg_flags & NLM_F_APPEND));
int add = (!info->nlh ||
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
- bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
u16 nlflags = NLM_F_EXCL;
int err;
- if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
+ if (append)
nlflags |= NLM_F_APPEND;
ins = &fn->leaf;
@@ -968,13 +968,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
nlflags &= ~NLM_F_EXCL;
if (replace) {
- if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
- found++;
- break;
- }
- if (rt_can_ecmp)
- fallback_ins = fallback_ins ?: ins;
- goto next_iter;
+ found++;
+ break;
}
if (rt6_duplicate_nexthop(iter, rt)) {
@@ -989,86 +984,67 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
return -EEXIST;
}
- /* If we have the same destination and the same metric,
- * but not the same gateway, then the route we try to
- * add is sibling to this route, increment our counter
- * of siblings, and later we will add our route to the
- * list.
- * Only static routes (which don't have flag
- * RTF_EXPIRES) are used for ECMPv6.
- *
- * To avoid long list, we only had siblings if the
- * route have a gateway.
- */
- if (rt_can_ecmp &&
- rt6_qualify_for_ecmp(iter))
- rt->fib6_nsiblings++;
+
+ /* first route that matches */
+ if (!match)
+ match = iter;
}
if (iter->fib6_metric > rt->fib6_metric)
break;
-next_iter:
ins = &iter->fib6_next;
}
- if (fallback_ins && !found) {
- /* No ECMP-able route found, replace first non-ECMP one */
- ins = fallback_ins;
- iter = rcu_dereference_protected(*ins,
- lockdep_is_held(&rt->fib6_table->tb6_lock));
- found++;
- }
-
/* Reset round-robin state, if necessary */
if (ins == &fn->leaf)
fn->rr_ptr = NULL;
/* Link this route to others same route. */
- if (rt->fib6_nsiblings) {
- unsigned int fib6_nsiblings;
+ if (append && match) {
struct fib6_info *sibling, *temp_sibling;
- /* Find the first route that have the same metric */
- sibling = leaf;
- while (sibling) {
- if (sibling->fib6_metric == rt->fib6_metric &&
- rt6_qualify_for_ecmp(sibling)) {
- list_add_tail(&rt->fib6_siblings,
- &sibling->fib6_siblings);
- break;
- }
- sibling = rcu_dereference_protected(sibling->fib6_next,
- lockdep_is_held(&rt->fib6_table->tb6_lock));
+ if (rt->fib6_flags & RTF_REJECT) {
+ NL_SET_ERR_MSG(extack,
+ "Can not append a REJECT route");
+ return -EINVAL;
+ } else if (match->fib6_flags & RTF_REJECT) {
+ NL_SET_ERR_MSG(extack,
+ "Can not append to a REJECT route");
+ return -EINVAL;
}
+ rt->fib6_nsiblings = match->fib6_nsiblings;
+ list_add_tail(&rt->fib6_siblings, &match->fib6_siblings);
+ match->fib6_nsiblings++;
+
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
* is broken!
*/
- fib6_nsiblings = 0;
list_for_each_entry_safe(sibling, temp_sibling,
- &rt->fib6_siblings, fib6_siblings) {
+ &match->fib6_siblings, fib6_siblings) {
sibling->fib6_nsiblings++;
- BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
- fib6_nsiblings++;
+ BUG_ON(sibling->fib6_nsiblings != match->fib6_nsiblings);
}
- BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
- rt6_multipath_rebalance(temp_sibling);
+
+ rt6_multipath_rebalance(match);
}
/*
* insert node
*/
if (!replace) {
+ enum fib_event_type event;
+
if (!add)
pr_warn("NLM_F_CREATE should be set when creating new route\n");
add:
nlflags |= NLM_F_CREATE;
- err = call_fib6_entry_notifiers(info->nl_net,
- FIB_EVENT_ENTRY_ADD,
- rt, extack);
+ event = append ? FIB_EVENT_ENTRY_APPEND : FIB_EVENT_ENTRY_ADD;
+ err = call_fib6_entry_notifiers(info->nl_net, event, rt,
+ extack);
if (err)
return err;
@@ -1086,7 +1062,7 @@ add:
}
} else {
- int nsiblings;
+ struct fib6_info *tmp;
if (!found) {
if (add)
@@ -1101,48 +1077,57 @@ add:
if (err)
return err;
+ /* if route being replaced has siblings, set tmp to
+ * last one, otherwise tmp is current route. this is
+ * used to set fib6_next for new route
+ */
+ if (iter->fib6_nsiblings)
+ tmp = list_last_entry(&iter->fib6_siblings,
+ struct fib6_info,
+ fib6_siblings);
+ else
+ tmp = iter;
+
+ /* insert new route */
atomic_inc(&rt->fib6_ref);
rcu_assign_pointer(rt->fib6_node, fn);
- rt->fib6_next = iter->fib6_next;
+ rt->fib6_next = tmp->fib6_next;
rcu_assign_pointer(*ins, rt);
+
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
fn->fn_flags |= RTN_RTINFO;
}
- nsiblings = iter->fib6_nsiblings;
- iter->fib6_node = NULL;
- fib6_purge_rt(iter, fn, info->nl_net);
- if (rcu_access_pointer(fn->rr_ptr) == iter)
- fn->rr_ptr = NULL;
- fib6_info_release(iter);
- if (nsiblings) {
+ /* delete old route */
+ rt = iter;
+
+ if (rt->fib6_nsiblings) {
+ struct fib6_info *tmp;
+
/* Replacing an ECMP route, remove all siblings */
- ins = &rt->fib6_next;
- iter = rcu_dereference_protected(*ins,
- lockdep_is_held(&rt->fib6_table->tb6_lock));
- while (iter) {
- if (iter->fib6_metric > rt->fib6_metric)
- break;
- if (rt6_qualify_for_ecmp(iter)) {
- *ins = iter->fib6_next;
- iter->fib6_node = NULL;
- fib6_purge_rt(iter, fn, info->nl_net);
- if (rcu_access_pointer(fn->rr_ptr) == iter)
- fn->rr_ptr = NULL;
- fib6_info_release(iter);
- nsiblings--;
- info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
- } else {
- ins = &iter->fib6_next;
- }
- iter = rcu_dereference_protected(*ins,
- lockdep_is_held(&rt->fib6_table->tb6_lock));
+ list_for_each_entry_safe(iter, tmp, &rt->fib6_siblings,
+ fib6_siblings) {
+ iter->fib6_node = NULL;
+ fib6_purge_rt(iter, fn, info->nl_net);
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
+ fn->rr_ptr = NULL;
+ fib6_info_release(iter);
+
+ rt->fib6_nsiblings--;
+ info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
}
- WARN_ON(nsiblings != 0);
}
+
+ WARN_ON(rt->fib6_nsiblings != 0);
+
+ rt->fib6_node = NULL;
+ fib6_purge_rt(rt, fn, info->nl_net);
+ if (rcu_access_pointer(fn->rr_ptr) == rt)
+ fn->rr_ptr = NULL;
+ fib6_info_release(rt);
}
return 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cc24ed3bc334..bcb8785c0451 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3791,7 +3791,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
lockdep_is_held(&rt->fib6_table->tb6_lock));
while (iter) {
if (iter->fib6_metric == rt->fib6_metric &&
- rt6_qualify_for_ecmp(iter))
+ iter->fib6_nsiblings)
return iter;
iter = rcu_dereference_protected(iter->fib6_next,
lockdep_is_held(&rt->fib6_table->tb6_lock));
@@ -4381,6 +4381,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
*/
cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
NLM_F_REPLACE);
+ cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_APPEND;
nhn++;
}