Patchwork [net-next,01/13] ipv6: Rename fib6_multipath_select and pass fib6_result

login
register
mail settings
Submitter dsahern@kernel.org
Date April 16, 2019, 12:56 a.m.
Message ID <20190416005652.29286-2-dsahern@kernel.org>
Download mbox | patch
Permalink /patch/773759/
State New
Headers show

Comments

dsahern@kernel.org - April 16, 2019, 12:56 a.m.
From: David Ahern <dsahern@gmail.com>

Add 'struct fib6_result' to hold the fib entry and fib6_nh from a fib
lookup as separate entries, similar to what IPv4 now has with fib_result.

Rename fib6_multipath_select to fib6_select_path, pass fib6_result to
it, and set f6i and nh in the result once a path selection is done.
Call fib6_select_path unconditionally for path selection which means
moving the sibling and oif check to fib6_select_path. To handle the two
different call paths (2 only call multipath_select if flowi6_oif == 0 and
the other always calls it), add a new have_oif_match that controls the
sibling walk if relevant.

Update callers of fib6_multipath_select accordingly and have them use the
fib6_info and fib6_nh from the result.

This is needed for multipath nexthop objects where a single f6i can
point to multiple fib6_nh (similar to IPv4).

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h    | 13 ++++++----
 include/net/ipv6_stubs.h |  9 +++----
 net/core/filter.c        | 34 +++++++++++++-------------
 net/ipv6/addrconf_core.c | 11 ++++-----
 net/ipv6/af_inet6.c      |  2 +-
 net/ipv6/route.c         | 63 +++++++++++++++++++++++++-----------------------
 6 files changed, 68 insertions(+), 64 deletions(-)

Patch

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 2e9235adfa0d..c4d818041663 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -190,6 +190,11 @@  struct rt6_info {
 	unsigned short			rt6i_nfheader_len;
 };
 
+struct fib6_result {
+	struct fib6_nh		*nh;
+	struct fib6_info	*f6i;
+};
+
 #define for_each_fib6_node_rt_rcu(fn)					\
 	for (rt = rcu_dereference((fn)->leaf); rt;			\
 	     rt = rcu_dereference(rt->fib6_next))
@@ -391,11 +396,9 @@  struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
 struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
 				    int oif, struct flowi6 *fl6, int strict);
 
-struct fib6_info *fib6_multipath_select(const struct net *net,
-					struct fib6_info *match,
-					struct flowi6 *fl6, int oif,
-					const struct sk_buff *skb, int strict);
-
+void fib6_select_path(const struct net *net, struct fib6_result *res,
+		      struct flowi6 *fl6, int oif, bool have_oif_match,
+		      const struct sk_buff *skb, int strict);
 struct fib6_node *fib6_node_lookup(struct fib6_node *root,
 				   const struct in6_addr *daddr,
 				   const struct in6_addr *saddr);
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 453b55bf6723..5df36d6a2613 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -14,6 +14,7 @@ 
 struct fib6_info;
 struct fib6_nh;
 struct fib6_config;
+struct fib6_result;
 
 /* This is ugly, ideally these symbols should be built
  * into the core kernel.
@@ -34,11 +35,9 @@  struct ipv6_stub {
 					      struct fib6_table *table,
 					      int oif, struct flowi6 *fl6,
 					      int flags);
-	struct fib6_info *(*fib6_multipath_select)(const struct net *net,
-						   struct fib6_info *f6i,
-						   struct flowi6 *fl6, int oif,
-						   const struct sk_buff *skb,
-						   int strict);
+	void (*fib6_select_path)(const struct net *net, struct fib6_result *res,
+				 struct flowi6 *fl6, int oif, bool oif_match,
+				 const struct sk_buff *skb, int strict);
 	u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
 				 struct in6_addr *saddr);
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 95a27fdf9a40..72b29e4d705b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4677,9 +4677,9 @@  static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
 	struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
 	struct neighbour *neigh;
+	struct fib6_result res;
 	struct net_device *dev;
 	struct inet6_dev *idev;
-	struct fib6_info *f6i;
 	struct flowi6 fl6;
 	int strict = 0;
 	int oif;
@@ -4724,21 +4724,23 @@  static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 		if (unlikely(!tb))
 			return BPF_FIB_LKUP_RET_NOT_FWDED;
 
-		f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
+		res.f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6,
+						       strict);
 	} else {
 		fl6.flowi6_mark = 0;
 		fl6.flowi6_secid = 0;
 		fl6.flowi6_tun_key.tun_id = 0;
 		fl6.flowi6_uid = sock_net_uid(net, NULL);
 
-		f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
+		res.f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
 	}
 
-	if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
+	if (unlikely(IS_ERR_OR_NULL(res.f6i) ||
+		     res.f6i == net->ipv6.fib6_null_entry))
 		return BPF_FIB_LKUP_RET_NOT_FWDED;
 
-	if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
-		switch (f6i->fib6_type) {
+	if (unlikely(res.f6i->fib6_flags & RTF_REJECT)) {
+		switch (res.f6i->fib6_type) {
 		case RTN_BLACKHOLE:
 			return BPF_FIB_LKUP_RET_BLACKHOLE;
 		case RTN_UNREACHABLE:
@@ -4750,28 +4752,26 @@  static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 		}
 	}
 
-	if (f6i->fib6_type != RTN_UNICAST)
+	if (res.f6i->fib6_type != RTN_UNICAST)
 		return BPF_FIB_LKUP_RET_NOT_FWDED;
 
-	if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
-		f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
-						       fl6.flowi6_oif, NULL,
-						       strict);
+	ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
+				    fl6.flowi6_oif != 0, NULL, strict);
 
 	if (check_mtu) {
-		mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
+		mtu = ipv6_stub->ip6_mtu_from_fib6(res.f6i, dst, src);
 		if (params->tot_len > mtu)
 			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
 	}
 
-	if (f6i->fib6_nh.fib_nh_lws)
+	if (res.nh->fib_nh_lws)
 		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
 
-	if (f6i->fib6_nh.fib_nh_gw_family)
-		*dst = f6i->fib6_nh.fib_nh_gw6;
+	if (res.nh->fib_nh_gw_family)
+		*dst = res.nh->fib_nh_gw6;
 
-	dev = f6i->fib6_nh.fib_nh_dev;
-	params->rt_metric = f6i->fib6_metric;
+	dev = res.nh->fib_nh_dev;
+	params->rt_metric = res.f6i->fib6_metric;
 
 	/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
 	 * not needed here.
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index e37e4c5871f7..b11fa0aa18a0 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -158,12 +158,11 @@  eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
 	return NULL;
 }
 
-static struct fib6_info *
-eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
-				   struct flowi6 *fl6, int oif,
-				   const struct sk_buff *skb, int strict)
+static void
+eafnosupport_fib6_select_path(const struct net *net, struct fib6_result *res,
+			      struct flowi6 *fl6, int oif, bool have_oif_match,
+			      const struct sk_buff *skb, int strict)
 {
-	return f6i;
 }
 
 static u32
@@ -187,7 +186,7 @@  const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
 	.fib6_get_table    = eafnosupport_fib6_get_table,
 	.fib6_table_lookup = eafnosupport_fib6_table_lookup,
 	.fib6_lookup       = eafnosupport_fib6_lookup,
-	.fib6_multipath_select = eafnosupport_fib6_multipath_select,
+	.fib6_select_path  = eafnosupport_fib6_select_path,
 	.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
 	.fib6_nh_init	   = eafnosupport_fib6_nh_init,
 };
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 1dac6ea6666a..d8587ca4fbeb 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -917,7 +917,7 @@  static const struct ipv6_stub ipv6_stub_impl = {
 	.fib6_get_table	   = fib6_get_table,
 	.fib6_table_lookup = fib6_table_lookup,
 	.fib6_lookup       = fib6_lookup,
-	.fib6_multipath_select = fib6_multipath_select,
+	.fib6_select_path  = fib6_select_path,
 	.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
 	.fib6_nh_init	   = fib6_nh_init,
 	.fib6_nh_release   = fib6_nh_release,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a77c004d67fb..dd717d1539b6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -428,13 +428,15 @@  static bool rt6_check_expired(const struct rt6_info *rt)
 	return false;
 }
 
-struct fib6_info *fib6_multipath_select(const struct net *net,
-					struct fib6_info *match,
-					struct flowi6 *fl6, int oif,
-					const struct sk_buff *skb,
-					int strict)
+void fib6_select_path(const struct net *net, struct fib6_result *res,
+		      struct flowi6 *fl6, int oif, bool have_oif_match,
+		      const struct sk_buff *skb, int strict)
 {
 	struct fib6_info *sibling, *next_sibling;
+	struct fib6_info *match = res->f6i;
+
+	if (!match->fib6_nsiblings || have_oif_match)
+		goto out;
 
 	/* We might have already computed the hash for ICMPv6 errors. In such
 	 * case it will always be non-zero. Otherwise now is the time to do it.
@@ -443,7 +445,7 @@  struct fib6_info *fib6_multipath_select(const struct net *net,
 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 
 	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
-		return match;
+		goto out;
 
 	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
 				 fib6_siblings) {
@@ -459,7 +461,9 @@  struct fib6_info *fib6_multipath_select(const struct net *net,
 		break;
 	}
 
-	return match;
+out:
+	res->f6i = match;
+	res->nh = &match->fib6_nh;
 }
 
 /*
@@ -1063,7 +1067,7 @@  static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     const struct sk_buff *skb,
 					     int flags)
 {
-	struct fib6_info *f6i;
+	struct fib6_result res = {};
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 
@@ -1073,14 +1077,14 @@  static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 	rcu_read_lock();
 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
-	f6i = rcu_dereference(fn->leaf);
-	if (!f6i)
-		f6i = net->ipv6.fib6_null_entry;
+	res.f6i = rcu_dereference(fn->leaf);
+	if (!res.f6i)
+		res.f6i = net->ipv6.fib6_null_entry;
 	else
-		f6i = rt6_device_match(net, f6i, &fl6->saddr,
-				      fl6->flowi6_oif, flags);
+		res.f6i = rt6_device_match(net, res.f6i, &fl6->saddr,
+					   fl6->flowi6_oif, flags);
 
-	if (f6i == net->ipv6.fib6_null_entry) {
+	if (res.f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto restart;
@@ -1090,20 +1094,20 @@  static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 		goto out;
 	}
 
-	if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
-		f6i = fib6_multipath_select(net, f6i, fl6, fl6->flowi6_oif, skb,
-					    flags);
+	fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
+			 fl6->flowi6_oif != 0, skb, flags);
+
 	/* Search through exception table */
-	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+	rt = rt6_find_cached_rt(res.f6i, &fl6->daddr, &fl6->saddr);
 	if (rt) {
 		if (ip6_hold_safe(net, &rt))
 			dst_use_noref(&rt->dst, jiffies);
 	} else {
-		rt = ip6_create_rt_rcu(f6i);
+		rt = ip6_create_rt_rcu(res.f6i);
 	}
 
 out:
-	trace_fib6_table_lookup(net, f6i, table, fl6);
+	trace_fib6_table_lookup(net, res.f6i, table, fl6);
 
 	rcu_read_unlock();
 
@@ -1843,7 +1847,7 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			       int oif, struct flowi6 *fl6,
 			       const struct sk_buff *skb, int flags)
 {
-	struct fib6_info *f6i;
+	struct fib6_result res = {};
 	struct rt6_info *rt;
 	int strict = 0;
 
@@ -1854,19 +1858,18 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 	rcu_read_lock();
 
-	f6i = fib6_table_lookup(net, table, oif, fl6, strict);
-	if (f6i == net->ipv6.fib6_null_entry) {
+	res.f6i = fib6_table_lookup(net, table, oif, fl6, strict);
+	if (res.f6i == net->ipv6.fib6_null_entry) {
 		rt = net->ipv6.ip6_null_entry;
 		rcu_read_unlock();
 		dst_hold(&rt->dst);
 		return rt;
 	}
 
-	if (f6i->fib6_nsiblings)
-		f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
+	fib6_select_path(net, &res, fl6, oif, false, skb, strict);
 
 	/*Search through exception table */
-	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+	rt = rt6_find_cached_rt(res.f6i, &fl6->daddr, &fl6->saddr);
 	if (rt) {
 		if (ip6_hold_safe(net, &rt))
 			dst_use_noref(&rt->dst, jiffies);
@@ -1874,7 +1877,7 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		rcu_read_unlock();
 		return rt;
 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
-			    !f6i->fib6_nh.fib_nh_gw_family)) {
+			    !res.nh->fib_nh_gw_family)) {
 		/* Create a RTF_CACHE clone which will not be
 		 * owned by the fib6 tree.  It is for the special case where
 		 * the daddr in the skb during the neighbor look-up is different
@@ -1882,7 +1885,7 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		 */
 		struct rt6_info *uncached_rt;
 
-		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
+		uncached_rt = ip6_rt_cache_alloc(res.f6i, &fl6->daddr, NULL);
 
 		rcu_read_unlock();
 
@@ -1904,10 +1907,10 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		struct rt6_info *pcpu_rt;
 
 		local_bh_disable();
-		pcpu_rt = rt6_get_pcpu_route(f6i);
+		pcpu_rt = rt6_get_pcpu_route(res.f6i);
 
 		if (!pcpu_rt)
-			pcpu_rt = rt6_make_pcpu_route(net, f6i);
+			pcpu_rt = rt6_make_pcpu_route(net, res.f6i);
 
 		local_bh_enable();
 		rcu_read_unlock();