diff mbox

[net-next,v12,5/9] openvswitch: add processing of L3 packets

Message ID a4590d690c169e0021031405463c8629a6e9ecb8.1476708213.git.jbenc@redhat.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Jiri Benc Oct. 17, 2016, 1:02 p.m. UTC
Support receiving, extracting flow key and sending of L3 packets (packets
without an Ethernet header).

Note that even after this patch, non-Ethernet interfaces are still not
allowed to be added to bridges. Similarly, netlink interface for sending and
receiving L3 packets to/from user space is not in place yet.

Based on previous versions by Lorand Jakab and Simon Horman.

Signed-off-by: Lorand Jakab <lojakab@cisco.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
---
 net/openvswitch/datapath.c |  17 ++------
 net/openvswitch/flow.c     | 101 ++++++++++++++++++++++++++++++++++-----------
 net/openvswitch/vport.c    |  16 +++++++
 3 files changed, 96 insertions(+), 38 deletions(-)

Comments

Pravin Shelar Oct. 19, 2016, 5:13 a.m. UTC | #1
On Mon, Oct 17, 2016 at 6:02 AM, Jiri Benc <jbenc@redhat.com> wrote:
> Support receiving, extracting flow key and sending of L3 packets (packets
> without an Ethernet header).
>
> Note that even after this patch, non-Ethernet interfaces are still not
> allowed to be added to bridges. Similarly, netlink interface for sending and
> receiving L3 packets to/from user space is not in place yet.
>
> Based on previous versions by Lorand Jakab and Simon Horman.
>
> Signed-off-by: Lorand Jakab <lojakab@cisco.com>
> Signed-off-by: Simon Horman <simon.horman@netronome.com>
> Signed-off-by: Jiri Benc <jbenc@redhat.com>
> ---
>  net/openvswitch/datapath.c |  17 ++------
>  net/openvswitch/flow.c     | 101 ++++++++++++++++++++++++++++++++++-----------
>  net/openvswitch/vport.c    |  16 +++++++
>  3 files changed, 96 insertions(+), 38 deletions(-)
>
...

> @@ -505,28 +511,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
>
>         skb_reset_mac_header(skb);
>
> -       /* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
> -        * header in the linear data area.
> -        */
> -       eth = eth_hdr(skb);
> -       ether_addr_copy(key->eth.src, eth->h_source);
> -       ether_addr_copy(key->eth.dst, eth->h_dest);
> +       /* Link layer. */
> +       clear_vlan(key);
> +       if (key->mac_proto == MAC_PROTO_NONE) {
> +               if (unlikely(eth_type_vlan(skb->protocol)))
> +                       return -EINVAL;
>
> -       __skb_pull(skb, 2 * ETH_ALEN);
> -       /* We are going to push all headers that we pull, so no need to
> -        * update skb->csum here.
> -        */
> +               skb_reset_network_header(skb);
> +       } else {
> +               eth = eth_hdr(skb);
> +               ether_addr_copy(key->eth.src, eth->h_source);
> +               ether_addr_copy(key->eth.dst, eth->h_dest);
>
> -       if (unlikely(parse_vlan(skb, key)))
> -               return -ENOMEM;
> +               __skb_pull(skb, 2 * ETH_ALEN);
> +               /* We are going to push all headers that we pull, so no need to
> +               * update skb->csum here.
> +               */
>
> -       key->eth.type = parse_ethertype(skb);
> -       if (unlikely(key->eth.type == htons(0)))
> -               return -ENOMEM;
> +               if (unlikely(parse_vlan(skb, key)))
> +                       return -ENOMEM;
>
> -       skb_reset_network_header(skb);
> +               skb->protocol = parse_ethertype(skb);

I am not sure about changing skb->protocol here.
By changing this skb loosing information about packet type. Therefore
if packet re-enters OVS (through different bridge), this packet would
look like L3 packet. function key_extract_mac_proto() would not see
TEB type packet.

> +               if (unlikely(skb->protocol == htons(0)))
> +                       return -ENOMEM;
> +
> +               skb_reset_network_header(skb);
> +               __skb_push(skb, skb->data - skb_mac_header(skb));
> +       }
>         skb_reset_mac_len(skb);
> -       __skb_push(skb, skb->data - skb_mac_header(skb));
> +       key->eth.type = skb->protocol;
>
>         /* Network layer. */
>         if (key->eth.type == htons(ETH_P_IP)) {
> @@ -721,6 +734,20 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
>         return key_extract(skb, key);
>  }
>
> +static u8 key_extract_mac_proto(struct sk_buff *skb)
> +{
> +       switch (skb->dev->type) {
> +       case ARPHRD_ETHER:
> +               return MAC_PROTO_ETHERNET;
> +       case ARPHRD_NONE:
> +               if (skb->protocol == htons(ETH_P_TEB))
> +                       return MAC_PROTO_ETHERNET;
> +               return MAC_PROTO_NONE;
> +       }
> +       WARN_ON_ONCE(1);
> +       return MAC_PROTO_ETHERNET;
> +}
> +
Jiri Benc Oct. 19, 2016, 4:52 p.m. UTC | #2
On Tue, 18 Oct 2016 22:13:45 -0700, Pravin Shelar wrote:
> On Mon, Oct 17, 2016 at 6:02 AM, Jiri Benc <jbenc@redhat.com> wrote:
> > -       skb_reset_network_header(skb);
> > +               skb->protocol = parse_ethertype(skb);
> 
> I am not sure about changing skb->protocol here.
> By changing this skb loosing information about packet type. Therefore
> if packet re-enters OVS (through different bridge), this packet would
> look like L3 packet. function key_extract_mac_proto() would not see
> TEB type packet.

This should be okay. If the packet is sent out to an Ethernet interface
(whatever interface it is), skb->protocol needs to contain the payload
type. We're not interested in ETH_P_TEB. If the packet is sent out to
an ARPHRD_NONE interface, ETH_P_TEB is pushed back.

Basically, what we're doing here is unconditionally converting
ETH_P_TEB packets *coming from ARPHRD_NONE interfaces* (this is
important) into regular Ethernet packets. Which is exactly what we want.

Am I missing something?

 Jiri
Pravin Shelar Oct. 20, 2016, 6:48 p.m. UTC | #3
On Wed, Oct 19, 2016 at 9:52 AM, Jiri Benc <jbenc@redhat.com> wrote:
> On Tue, 18 Oct 2016 22:13:45 -0700, Pravin Shelar wrote:
>> On Mon, Oct 17, 2016 at 6:02 AM, Jiri Benc <jbenc@redhat.com> wrote:
>> > -       skb_reset_network_header(skb);
>> > +               skb->protocol = parse_ethertype(skb);
>>
>> I am not sure about changing skb->protocol here.
>> By changing this skb loosing information about packet type. Therefore
>> if packet re-enters OVS (through different bridge), this packet would
>> look like L3 packet. function key_extract_mac_proto() would not see
>> TEB type packet.
>
> This should be okay. If the packet is sent out to an Ethernet interface
> (whatever interface it is), skb->protocol needs to contain the payload
> type. We're not interested in ETH_P_TEB. If the packet is sent out to
> an ARPHRD_NONE interface, ETH_P_TEB is pushed back.
>
I see, vport send is restoring the skb protocol field. It should be fine then.

> Basically, what we're doing here is unconditionally converting
> ETH_P_TEB packets *coming from ARPHRD_NONE interfaces* (this is
> important) into regular Ethernet packets. Which is exactly what we want.
>
> Am I missing something?
>
>  Jiri
Pravin Shelar Oct. 21, 2016, 4:19 a.m. UTC | #4
On Mon, Oct 17, 2016 at 6:02 AM, Jiri Benc <jbenc@redhat.com> wrote:
> Support receiving, extracting flow key and sending of L3 packets (packets
> without an Ethernet header).
>
> Note that even after this patch, non-Ethernet interfaces are still not
> allowed to be added to bridges. Similarly, netlink interface for sending and
> receiving L3 packets to/from user space is not in place yet.
>
> Based on previous versions by Lorand Jakab and Simon Horman.
>
> Signed-off-by: Lorand Jakab <lojakab@cisco.com>
> Signed-off-by: Simon Horman <simon.horman@netronome.com>
> Signed-off-by: Jiri Benc <jbenc@redhat.com>
> ---
>  net/openvswitch/datapath.c |  17 ++------
>  net/openvswitch/flow.c     | 101 ++++++++++++++++++++++++++++++++++-----------
>  net/openvswitch/vport.c    |  16 +++++++
>  3 files changed, 96 insertions(+), 38 deletions(-)
>
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index 4d67ea856067..c5b719fca8d4 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
...
...
> @@ -609,8 +597,10 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
>
>         err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
>                                              packet, &flow->key, log);
> -       if (err)
> +       if (err) {
> +               packet = NULL;
>                 goto err_flow_free;
> +       }
>
Why packet is set to NULL? This would leak skb memory in case of error.

>         err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
>                                    &flow->key, &acts, log);
...
> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> index 96c8c4716603..7aee6e273765 100644
> --- a/net/openvswitch/flow.c
> +++ b/net/openvswitch/flow.c
...

> @@ -721,6 +734,20 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
>         return key_extract(skb, key);
>  }
>
> +static u8 key_extract_mac_proto(struct sk_buff *skb)
> +{
> +       switch (skb->dev->type) {
> +       case ARPHRD_ETHER:
> +               return MAC_PROTO_ETHERNET;
> +       case ARPHRD_NONE:
> +               if (skb->protocol == htons(ETH_P_TEB))
> +                       return MAC_PROTO_ETHERNET;
> +               return MAC_PROTO_NONE;
> +       }
> +       WARN_ON_ONCE(1);
> +       return MAC_PROTO_ETHERNET;
This packet should be dropped.

> +}
> +
>  int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>                          struct sk_buff *skb, struct sw_flow_key *key)
>  {
....

> diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
> index dc0e2212edfc..8361b62a47c2 100644
> --- a/net/openvswitch/vport.c
> +++ b/net/openvswitch/vport.c
> @@ -502,6 +502,22 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
>  {
>         int mtu = vport->dev->mtu;
>
> +       switch (vport->dev->type) {
> +       case ARPHRD_NONE:
> +               if (mac_proto != MAC_PROTO_NONE) {
> +                       WARN_ON_ONCE(mac_proto != MAC_PROTO_ETHERNET);
> +
It would be easy to read if you check mac_proto for MAC_PROTO_ETHERNET
and then warn otherwise.
another issue is the packet is not dropped if mac_proto is not
MAC_PROTO_ETHERNET

> +                       skb_reset_network_header(skb);
> +                       skb_reset_mac_len(skb);
> +                       skb->protocol = htons(ETH_P_TEB);
> +               }
> +               break;
> +       case ARPHRD_ETHER:
> +               if (mac_proto != MAC_PROTO_ETHERNET)
> +                       goto drop;
> +               break;
> +       }
> +
>         if (unlikely(packet_length(skb, vport->dev) > mtu &&
>                      !skb_is_gso(skb))) {
>                 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
> --
> 1.8.3.1
>
Jiri Benc Oct. 21, 2016, 11:59 a.m. UTC | #5
On Thu, 20 Oct 2016 21:19:14 -0700, Pravin Shelar wrote:
> On Mon, Oct 17, 2016 at 6:02 AM, Jiri Benc <jbenc@redhat.com> wrote:
> > @@ -609,8 +597,10 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
> >
> >         err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
> >                                              packet, &flow->key, log);
> > -       if (err)
> > +       if (err) {
> > +               packet = NULL;
> >                 goto err_flow_free;
> > +       }
> >
> Why packet is set to NULL? This would leak skb memory in case of error.

Leftover from the times when this was based on top of the patchset that
unified vlan handling. Thanks for catching it.

> > +static u8 key_extract_mac_proto(struct sk_buff *skb)
> > +{
> > +       switch (skb->dev->type) {
> > +       case ARPHRD_ETHER:
> > +               return MAC_PROTO_ETHERNET;
> > +       case ARPHRD_NONE:
> > +               if (skb->protocol == htons(ETH_P_TEB))
> > +                       return MAC_PROTO_ETHERNET;
> > +               return MAC_PROTO_NONE;
> > +       }
> > +       WARN_ON_ONCE(1);
> > +       return MAC_PROTO_ETHERNET;
> This packet should be dropped.

This should never happen, I put WARN_ON_ONCE there to catch impossible
to happen bugs (and BUG_ON is really too strong here). But sure, why
not, will drop the packet in v13.

> > +       switch (vport->dev->type) {
> > +       case ARPHRD_NONE:
> > +               if (mac_proto != MAC_PROTO_NONE) {
> > +                       WARN_ON_ONCE(mac_proto != MAC_PROTO_ETHERNET);
> > +
> It would be easy to read if you check mac_proto for MAC_PROTO_ETHERNET
> and then warn otherwise.

Okay.

> another issue is the packet is not dropped if mac_proto is not
> MAC_PROTO_ETHERNET

Likewise, impossible to happen, but will drop the packet.

Thanks,

 Jiri
diff mbox

Patch

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 4d67ea856067..c5b719fca8d4 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -562,7 +562,6 @@  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	struct sw_flow *flow;
 	struct sw_flow_actions *sf_acts;
 	struct datapath *dp;
-	struct ethhdr *eth;
 	struct vport *input_vport;
 	u16 mru = 0;
 	int len;
@@ -583,17 +582,6 @@  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
 	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 
-	skb_reset_mac_header(packet);
-	eth = eth_hdr(packet);
-
-	/* Normally, setting the skb 'protocol' field would be handled by a
-	 * call to eth_type_trans(), but it assumes there's a sending
-	 * device, which we may not have. */
-	if (eth_proto_is_802_3(eth->h_proto))
-		packet->protocol = eth->h_proto;
-	else
-		packet->protocol = htons(ETH_P_802_2);
-
 	/* Set packet's mru */
 	if (a[OVS_PACKET_ATTR_MRU]) {
 		mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
@@ -609,8 +597,10 @@  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
 	err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
 					     packet, &flow->key, log);
-	if (err)
+	if (err) {
+		packet = NULL;
 		goto err_flow_free;
+	}
 
 	err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
 				   &flow->key, &acts, log);
@@ -620,6 +610,7 @@  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	rcu_assign_pointer(flow->sf_acts, acts);
 	packet->priority = flow->key.phy.priority;
 	packet->mark = flow->key.phy.skb_mark;
+	packet->protocol = flow->key.eth.type;
 
 	rcu_read_lock();
 	dp = get_dp_rcu(net, ovs_header->dp_ifindex);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 96c8c4716603..7aee6e273765 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -334,14 +334,17 @@  static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
 	return 1;
 }
 
-static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+static void clear_vlan(struct sw_flow_key *key)
 {
-	int res;
-
 	key->eth.vlan.tci = 0;
 	key->eth.vlan.tpid = 0;
 	key->eth.cvlan.tci = 0;
 	key->eth.cvlan.tpid = 0;
+}
+
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	int res;
 
 	if (skb_vlan_tag_present(skb)) {
 		key->eth.vlan.tci = htons(skb->vlan_tci);
@@ -483,17 +486,20 @@  static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
  *
  * Returns 0 if successful, otherwise a negative errno value.
  *
- * Initializes @skb header pointers as follows:
+ * Initializes @skb header fields as follows:
  *
- *    - skb->mac_header: the Ethernet header.
+ *    - skb->mac_header: the L2 header.
  *
- *    - skb->network_header: just past the Ethernet header, or just past the
- *      VLAN header, to the first byte of the Ethernet payload.
+ *    - skb->network_header: just past the L2 header, or just past the
+ *      VLAN header, to the first byte of the L2 payload.
  *
  *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
  *      on output, then just past the IP header, if one is present and
  *      of a correct length, otherwise the same as skb->network_header.
  *      For other key->eth.type values it is left untouched.
+ *
+ *    - skb->protocol: the type of the data starting at skb->network_header.
+ *      Equals to key->eth.type.
  */
 static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 {
@@ -505,28 +511,35 @@  static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 
 	skb_reset_mac_header(skb);
 
-	/* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
-	 * header in the linear data area.
-	 */
-	eth = eth_hdr(skb);
-	ether_addr_copy(key->eth.src, eth->h_source);
-	ether_addr_copy(key->eth.dst, eth->h_dest);
+	/* Link layer. */
+	clear_vlan(key);
+	if (key->mac_proto == MAC_PROTO_NONE) {
+		if (unlikely(eth_type_vlan(skb->protocol)))
+			return -EINVAL;
 
-	__skb_pull(skb, 2 * ETH_ALEN);
-	/* We are going to push all headers that we pull, so no need to
-	 * update skb->csum here.
-	 */
+		skb_reset_network_header(skb);
+	} else {
+		eth = eth_hdr(skb);
+		ether_addr_copy(key->eth.src, eth->h_source);
+		ether_addr_copy(key->eth.dst, eth->h_dest);
 
-	if (unlikely(parse_vlan(skb, key)))
-		return -ENOMEM;
+		__skb_pull(skb, 2 * ETH_ALEN);
+		/* We are going to push all headers that we pull, so no need to
+		* update skb->csum here.
+		*/
 
-	key->eth.type = parse_ethertype(skb);
-	if (unlikely(key->eth.type == htons(0)))
-		return -ENOMEM;
+		if (unlikely(parse_vlan(skb, key)))
+			return -ENOMEM;
 
-	skb_reset_network_header(skb);
+		skb->protocol = parse_ethertype(skb);
+		if (unlikely(skb->protocol == htons(0)))
+			return -ENOMEM;
+
+		skb_reset_network_header(skb);
+		__skb_push(skb, skb->data - skb_mac_header(skb));
+	}
 	skb_reset_mac_len(skb);
-	__skb_push(skb, skb->data - skb_mac_header(skb));
+	key->eth.type = skb->protocol;
 
 	/* Network layer. */
 	if (key->eth.type == htons(ETH_P_IP)) {
@@ -721,6 +734,20 @@  int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
 	return key_extract(skb, key);
 }
 
+static u8 key_extract_mac_proto(struct sk_buff *skb)
+{
+	switch (skb->dev->type) {
+	case ARPHRD_ETHER:
+		return MAC_PROTO_ETHERNET;
+	case ARPHRD_NONE:
+		if (skb->protocol == htons(ETH_P_TEB))
+			return MAC_PROTO_ETHERNET;
+		return MAC_PROTO_NONE;
+	}
+	WARN_ON_ONCE(1);
+	return MAC_PROTO_ETHERNET;
+}
+
 int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 			 struct sk_buff *skb, struct sw_flow_key *key)
 {
@@ -751,7 +778,7 @@  int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 	key->phy.skb_mark = skb->mark;
 	ovs_ct_fill_key(skb, key);
 	key->ovs_flow_hash = 0;
-	key->mac_proto = MAC_PROTO_ETHERNET;
+	key->mac_proto = key_extract_mac_proto(skb);
 	key->recirc_id = 0;
 
 	return key_extract(skb, key);
@@ -768,5 +795,29 @@  int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
 	if (err)
 		return err;
 
+	if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
+		/* key_extract assumes that skb->protocol is set-up for
+		 * layer 3 packets which is the case for other callers,
+		 * in particular packets recieved from the network stack.
+		 * Here the correct value can be set from the metadata
+		 * extracted above.
+		 */
+		skb->protocol = key->eth.type;
+	} else {
+		struct ethhdr *eth;
+
+		skb_reset_mac_header(skb);
+		eth = eth_hdr(skb);
+
+		/* Normally, setting the skb 'protocol' field would be
+		 * handled by a call to eth_type_trans(), but it assumes
+		 * there's a sending device, which we may not have.
+		 */
+		if (eth_proto_is_802_3(eth->h_proto))
+			skb->protocol = eth->h_proto;
+		else
+			skb->protocol = htons(ETH_P_802_2);
+	}
+
 	return key_extract(skb, key);
 }
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index dc0e2212edfc..8361b62a47c2 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -502,6 +502,22 @@  void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
 {
 	int mtu = vport->dev->mtu;
 
+	switch (vport->dev->type) {
+	case ARPHRD_NONE:
+		if (mac_proto != MAC_PROTO_NONE) {
+			WARN_ON_ONCE(mac_proto != MAC_PROTO_ETHERNET);
+
+			skb_reset_network_header(skb);
+			skb_reset_mac_len(skb);
+			skb->protocol = htons(ETH_P_TEB);
+		}
+		break;
+	case ARPHRD_ETHER:
+		if (mac_proto != MAC_PROTO_ETHERNET)
+			goto drop;
+		break;
+	}
+
 	if (unlikely(packet_length(skb, vport->dev) > mtu &&
 		     !skb_is_gso(skb))) {
 		net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",