Patchwork [bpf-next,v9,5/7] bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c

login
register
mail settings
Submitter Peter Oskolkov
Date Feb. 12, 2019, 12:42 a.m.
Message ID <20190212004249.219268-6-posk@google.com>
Download mbox | patch
Permalink /patch/723435/
State New
Headers show

Comments

Peter Oskolkov - Feb. 12, 2019, 12:42 a.m.
This patch builds on top of the previous patch in the patchset,
which added BPF_LWT_ENCAP_IP mode to bpf_lwt_push_encap. As the
encapping can result in the skb needing to go via a different
interface/route/dst, bpf programs can indicate this by returning
BPF_LWT_REROUTE, which triggers a new route lookup for the skb.

v8 changes: fix kbuild errors when LWTUNNEL_BPF is builtin, but
   IPV6 is a module: as LWTUNNEL_BPF can only be either Y or N,
   call IPV6 routing functions only if they are built-in.

v9 changes:
   - fixed a kbuild test robot compiler warning;
   - call IPV6 routing functions via ipv6_stub.

Signed-off-by: Peter Oskolkov <posk@google.com>
---
 net/core/lwt_bpf.c | 133 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)
David Ahern - Feb. 12, 2019, 4:39 a.m.
On 2/11/19 5:42 PM, Peter Oskolkov wrote:
> @@ -88,6 +90,35 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
>  	return ret;
>  }
>  
> +static int bpf_lwt_input_reroute(struct sk_buff *skb)
> +{
> +	int err = -EINVAL;
> +
> +	if (skb->protocol == htons(ETH_P_IP)) {
> +		struct iphdr *iph = ip_hdr(skb);
> +
> +		err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
> +					   iph->tos, skb_dst(skb)->dev);
> +	} else if (skb->protocol == htons(ETH_P_IPV6)) {
> +#if IS_ENABLED(CONFIG_IPV6)
> +		err = ipv6_stub->ipv6_route_input(skb);
> +#else
> +		pr_warn_once("BPF_LWT_REROUTE input: IPV6 not available\n");
> +#endif

The stub defines ipv6_route_input when IPv6 is not enabled.
addrconf_core.o is compiled in if NET is enabled irregardless of
CONFIG_IPV6. ie., you don't need the IS_ENABLED check here.

If a bpf program pushes a v6 header the stub returns -EAFNOSUPPORT based
on patch 4.


> +	} else {
> +		pr_warn_once("BPF_LWT_REROUTE input: unsupported proto %d\n",
> +			     skb->protocol);

You don't need a warn on here; just return -EAFNOSUPPORT.


> +	}
> +
> +	if (err)
> +		goto err;
> +	return dst_input(skb);
> +
> +err:
> +	kfree_skb(skb);
> +	return err;
> +}
> +
>  static int bpf_input(struct sk_buff *skb)
>  {
>  	struct dst_entry *dst = skb_dst(skb);
> @@ -99,6 +130,8 @@ static int bpf_input(struct sk_buff *skb)
>  		ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
>  		if (ret < 0)
>  			return ret;
> +		if (ret == BPF_LWT_REROUTE)
> +			return bpf_lwt_input_reroute(skb);
>  	}
>  
>  	if (unlikely(!dst->lwtstate->orig_input)) {
> @@ -148,6 +181,95 @@ static int xmit_check_hhlen(struct sk_buff *skb)
>  	return 0;
>  }
>  
> +static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> +{
> +	struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
> +	int oif = l3mdev ? l3mdev->ifindex : 0;
> +	struct dst_entry *dst = NULL;
> +	struct sock *sk;
> +	struct net *net;
> +	bool ipv4;
> +	int err;
> +
> +	if (skb->protocol == htons(ETH_P_IP)) {
> +		ipv4 = true;
> +	} else if (skb->protocol == htons(ETH_P_IPV6)) {
> +		ipv4 = false;
> +	} else {
> +		pr_warn_once("BPF_LWT_REROUTE xmit: unsupported proto %d\n",
> +			     skb->protocol);

same here - no warn on.

> +		return -EINVAL;
> +	}
> +
> +	sk = sk_to_full_sk(skb->sk);
> +	if (sk) {
> +		if (sk->sk_bound_dev_if)
> +			oif = sk->sk_bound_dev_if;
> +		net = sock_net(sk);
> +	} else {
> +		net = dev_net(skb_dst(skb)->dev);
> +	}
> +
> +	if (ipv4) {
> +		struct iphdr *iph = ip_hdr(skb);
> +		struct flowi4 fl4 = {};
> +		struct rtable *rt;
> +
> +		fl4.flowi4_oif = oif;
> +		fl4.flowi4_mark = skb->mark;
> +		fl4.flowi4_uid = sock_net_uid(net, sk);
> +		fl4.flowi4_tos = RT_TOS(iph->tos);
> +		fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
> +		fl4.flowi4_proto = iph->protocol;
> +		fl4.daddr = iph->daddr;
> +		fl4.saddr = iph->saddr;
> +
> +		rt = ip_route_output_key(net, &fl4);
> +		if (IS_ERR(rt) || rt->dst.error)
> +			return -EINVAL;
> +		dst = &rt->dst;
> +	} else {
> +#if IS_ENABLED(CONFIG_IPV6)
> +		struct ipv6hdr *iph6 = ipv6_hdr(skb);
> +		struct flowi6 fl6 = {};
> +
> +		fl6.flowi6_oif = oif;
> +		fl6.flowi6_mark = skb->mark;
> +		fl6.flowi6_uid = sock_net_uid(net, sk);
> +		fl6.flowlabel = ip6_flowinfo(iph6);
> +		fl6.flowi6_proto = iph6->nexthdr;
> +		fl6.daddr = iph6->daddr;
> +		fl6.saddr = iph6->saddr;
> +
> +		err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
> +		if (err || IS_ERR(dst) || dst->error)
> +			return -EINVAL;
> +#else
> +		pr_warn_once("BPF_LWT_REROUTE xmit: IPV6 not available\n");
> +		return -EINVAL;
> +#endif

No #if .. #endif needed. The stub handles it.

Patch

diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 079871fc020f..aec5e6df880e 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -17,6 +17,7 @@ 
 #include <linux/bpf.h>
 #include <net/lwtunnel.h>
 #include <net/gre.h>
+#include <net/ip6_route.h>
 
 struct bpf_lwt_prog {
 	struct bpf_prog *prog;
@@ -56,6 +57,7 @@  static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
 
 	switch (ret) {
 	case BPF_OK:
+	case BPF_LWT_REROUTE:
 		break;
 
 	case BPF_REDIRECT:
@@ -88,6 +90,35 @@  static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
 	return ret;
 }
 
+static int bpf_lwt_input_reroute(struct sk_buff *skb)
+{
+	int err = -EINVAL;
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		struct iphdr *iph = ip_hdr(skb);
+
+		err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+					   iph->tos, skb_dst(skb)->dev);
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+#if IS_ENABLED(CONFIG_IPV6)
+		err = ipv6_stub->ipv6_route_input(skb);
+#else
+		pr_warn_once("BPF_LWT_REROUTE input: IPV6 not available\n");
+#endif
+	} else {
+		pr_warn_once("BPF_LWT_REROUTE input: unsupported proto %d\n",
+			     skb->protocol);
+	}
+
+	if (err)
+		goto err;
+	return dst_input(skb);
+
+err:
+	kfree_skb(skb);
+	return err;
+}
+
 static int bpf_input(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -99,6 +130,8 @@  static int bpf_input(struct sk_buff *skb)
 		ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
 		if (ret < 0)
 			return ret;
+		if (ret == BPF_LWT_REROUTE)
+			return bpf_lwt_input_reroute(skb);
 	}
 
 	if (unlikely(!dst->lwtstate->orig_input)) {
@@ -148,6 +181,95 @@  static int xmit_check_hhlen(struct sk_buff *skb)
 	return 0;
 }
 
+static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
+{
+	struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
+	int oif = l3mdev ? l3mdev->ifindex : 0;
+	struct dst_entry *dst = NULL;
+	struct sock *sk;
+	struct net *net;
+	bool ipv4;
+	int err;
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		ipv4 = true;
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		ipv4 = false;
+	} else {
+		pr_warn_once("BPF_LWT_REROUTE xmit: unsupported proto %d\n",
+			     skb->protocol);
+		return -EINVAL;
+	}
+
+	sk = sk_to_full_sk(skb->sk);
+	if (sk) {
+		if (sk->sk_bound_dev_if)
+			oif = sk->sk_bound_dev_if;
+		net = sock_net(sk);
+	} else {
+		net = dev_net(skb_dst(skb)->dev);
+	}
+
+	if (ipv4) {
+		struct iphdr *iph = ip_hdr(skb);
+		struct flowi4 fl4 = {};
+		struct rtable *rt;
+
+		fl4.flowi4_oif = oif;
+		fl4.flowi4_mark = skb->mark;
+		fl4.flowi4_uid = sock_net_uid(net, sk);
+		fl4.flowi4_tos = RT_TOS(iph->tos);
+		fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
+		fl4.flowi4_proto = iph->protocol;
+		fl4.daddr = iph->daddr;
+		fl4.saddr = iph->saddr;
+
+		rt = ip_route_output_key(net, &fl4);
+		if (IS_ERR(rt) || rt->dst.error)
+			return -EINVAL;
+		dst = &rt->dst;
+	} else {
+#if IS_ENABLED(CONFIG_IPV6)
+		struct ipv6hdr *iph6 = ipv6_hdr(skb);
+		struct flowi6 fl6 = {};
+
+		fl6.flowi6_oif = oif;
+		fl6.flowi6_mark = skb->mark;
+		fl6.flowi6_uid = sock_net_uid(net, sk);
+		fl6.flowlabel = ip6_flowinfo(iph6);
+		fl6.flowi6_proto = iph6->nexthdr;
+		fl6.daddr = iph6->daddr;
+		fl6.saddr = iph6->saddr;
+
+		err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
+		if (err || IS_ERR(dst) || dst->error)
+			return -EINVAL;
+#else
+		pr_warn_once("BPF_LWT_REROUTE xmit: IPV6 not available\n");
+		return -EINVAL;
+#endif
+	}
+
+	/* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
+	 * was done for the previous dst, so we are doing it here again, in
+	 * case the new dst needs much more space. The call below is a noop
+	 * if there is enough header space in skb.
+	 */
+	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+	if (unlikely(err))
+		return err;
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+
+	err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+	if (unlikely(err))
+		return err;
+
+	/* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
+	return LWTUNNEL_XMIT_DONE;
+}
+
 static int bpf_xmit(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -155,11 +277,20 @@  static int bpf_xmit(struct sk_buff *skb)
 
 	bpf = bpf_lwt_lwtunnel(dst->lwtstate);
 	if (bpf->xmit.prog) {
+		__be16 proto = skb->protocol;
 		int ret;
 
 		ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
 		switch (ret) {
 		case BPF_OK:
+			/* If the header changed, e.g. via bpf_lwt_push_encap,
+			 * BPF_LWT_REROUTE below should have been used if the
+			 * protocol was also changed.
+			 */
+			if (skb->protocol != proto) {
+				kfree_skb(skb);
+				return -EINVAL;
+			}
 			/* If the header was expanded, headroom might be too
 			 * small for L2 header to come, expand as needed.
 			 */
@@ -170,6 +301,8 @@  static int bpf_xmit(struct sk_buff *skb)
 			return LWTUNNEL_XMIT_CONTINUE;
 		case BPF_REDIRECT:
 			return LWTUNNEL_XMIT_DONE;
+		case BPF_LWT_REROUTE:
+			return bpf_lwt_xmit_reroute(skb);
 		default:
 			return ret;
 		}