diff mbox

[v9,net-next,5/7] openvswitch: add layer 3 support to ovs_packet_cmd_execute()

Message ID 1462347393-22354-6-git-send-email-simon.horman@netronome.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Simon Horman May 4, 2016, 7:36 a.m. UTC
From: Lorand Jakab <lojakab@cisco.com>

When user space handles a packet back to the kernel datapath for
execution, it is not accompanied by the full flow key, only packet
metadata.  This doesn't allow detection of packet type (L2/L3).  Add the
OVS_KEY_ATTR_PACKET_ETHERTYPE attribute to the packet metadata,
containing 0 for layer 2 packets and the Ethertype for layer 3 packets.

Signed-off-by: Lorand Jakab <lojakab@cisco.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
v9 [Simon Horman]
* Rebase

v8 [Lorand Jakab]

v7 [Lorand Jakab]
* New patch
---
 include/uapi/linux/openvswitch.h |  2 ++
 net/openvswitch/datapath.c       | 13 +------------
 net/openvswitch/flow.c           |  5 ++++-
 net/openvswitch/flow_netlink.c   | 39 +++++++++++++++++++++++++++++++--------
 4 files changed, 38 insertions(+), 21 deletions(-)

Comments

Jiri Benc May 17, 2016, 2:51 p.m. UTC | #1
On Wed,  4 May 2016 16:36:31 +0900, Simon Horman wrote:
> +	/* Packets from user space for execution only have metadata key
> +	 * attributes.  OVS_KEY_ATTR_PACKET_ETHERTYPE is then used to specify
> +	 * the starting layer of the packet.  Packets with Ethernet headers
> +	 * have this attribute set to 0
> +	 */
> +	if (*attrs & (1ULL << OVS_KEY_ATTR_PACKET_ETHERTYPE)) {
> +		__be16 eth_type;
> +
> +		if (is_mask) {
> +			/* Always exact match packet EtherType */
> +			eth_type = htons(0xffff);
> +		} else {
> +			eth_type = nla_get_be16(a[OVS_KEY_ATTR_PACKET_ETHERTYPE]);
> +			is_layer3 = ((eth_type == htons(ETH_P_IP)) ||
> +				     (eth_type == htons(ETH_P_IPV6)));

Unknown types need to be rejected, not treated as layer2, otherwise we
may run into problems later (with combination of this kernel + newer
user space) when we add more types, such as ETH_P_NSH.

 Jiri
Simon Horman May 18, 2016, 2:24 a.m. UTC | #2
On Tue, May 17, 2016 at 04:51:08PM +0200, Jiri Benc wrote:
> On Wed,  4 May 2016 16:36:31 +0900, Simon Horman wrote:
> > +	/* Packets from user space for execution only have metadata key
> > +	 * attributes.  OVS_KEY_ATTR_PACKET_ETHERTYPE is then used to specify
> > +	 * the starting layer of the packet.  Packets with Ethernet headers
> > +	 * have this attribute set to 0
> > +	 */
> > +	if (*attrs & (1ULL << OVS_KEY_ATTR_PACKET_ETHERTYPE)) {
> > +		__be16 eth_type;
> > +
> > +		if (is_mask) {
> > +			/* Always exact match packet EtherType */
> > +			eth_type = htons(0xffff);
> > +		} else {
> > +			eth_type = nla_get_be16(a[OVS_KEY_ATTR_PACKET_ETHERTYPE]);
> > +			is_layer3 = ((eth_type == htons(ETH_P_IP)) ||
> > +				     (eth_type == htons(ETH_P_IPV6)));
> 
> Unknown types need to be rejected, not treated as layer2, otherwise we
> may run into problems later (with combination of this kernel + newer
> user space) when we add more types, such as ETH_P_NSH.

I believe this is fixed by the following patch.

Possibly it makes sense to squash that patch into and earlier patches.
diff mbox

Patch

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 53129c03273c..2bc2c12ced78 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -329,6 +329,8 @@  enum ovs_key_attr {
 	OVS_KEY_ATTR_CT_ZONE,	/* u16 connection tracking zone. */
 	OVS_KEY_ATTR_CT_MARK,	/* u32 connection tracking mark */
 	OVS_KEY_ATTR_CT_LABELS,	/* 16-octet connection tracking label */
+	OVS_KEY_ATTR_PACKET_ETHERTYPE,  /* be16 Ethernet type for packet
+					 * execution */
 
 #ifdef __KERNEL__
 	OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ip_tunnel_info */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 856bd8dba676..780957e78d2b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -546,7 +546,6 @@  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	struct sw_flow *flow;
 	struct sw_flow_actions *sf_acts;
 	struct datapath *dp;
-	struct ethhdr *eth;
 	struct vport *input_vport;
 	u16 mru = 0;
 	int len;
@@ -567,17 +566,6 @@  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
 	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 
-	skb_reset_mac_header(packet);
-	eth = eth_hdr(packet);
-
-	/* Normally, setting the skb 'protocol' field would be handled by a
-	 * call to eth_type_trans(), but it assumes there's a sending
-	 * device, which we may not have. */
-	if (eth_proto_is_802_3(eth->h_proto))
-		packet->protocol = eth->h_proto;
-	else
-		packet->protocol = htons(ETH_P_802_2);
-
 	/* Set packet's mru */
 	if (a[OVS_PACKET_ATTR_MRU]) {
 		mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
@@ -604,6 +592,7 @@  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	rcu_assign_pointer(flow->sf_acts, acts);
 	packet->priority = flow->key.phy.priority;
 	packet->mark = flow->key.phy.skb_mark;
+	packet->protocol = flow->key.eth.type;
 
 	rcu_read_lock();
 	dp = get_dp_rcu(net, ovs_header->dp_ifindex);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 6e174ea5f2bb..d320c2657627 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -471,7 +471,6 @@  static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 	/* Link layer. */
 	if (key->phy.is_layer3) {
 		key->eth.tci = 0;
-		key->eth.type = skb->protocol;
 	} else {
 		eth = eth_hdr(skb);
 		ether_addr_copy(key->eth.src, eth->h_source);
@@ -693,6 +692,8 @@  static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 
 int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
 {
+	key->eth.type = skb->protocol;
+
 	return key_extract(skb, key);
 }
 
@@ -747,6 +748,8 @@  int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 
 	if (is_teb)
 		skb->protocol = key->eth.type;
+	else if (is_layer3)
+		key->eth.type = skb->protocol;
 
 	return err;
 }
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d0ef4fa69bea..2bca1e5e9a18 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -286,7 +286,7 @@  size_t ovs_key_attr_size(void)
 	/* Whenever adding new OVS_KEY_ FIELDS, we should consider
 	 * updating this function.
 	 */
-	BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26);
+	BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 27);
 
 	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
 		+ nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
@@ -359,6 +359,7 @@  static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 	[OVS_KEY_ATTR_CT_ZONE]	 = { .len = sizeof(u16) },
 	[OVS_KEY_ATTR_CT_MARK]	 = { .len = sizeof(u32) },
 	[OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
+	[OVS_KEY_ATTR_PACKET_ETHERTYPE] = { .len = sizeof(__be16) },
 };
 
 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -816,6 +817,8 @@  static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
 				 u64 *attrs, const struct nlattr **a,
 				 bool is_mask, bool log)
 {
+	bool is_layer3;
+
 	if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
 		u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
 
@@ -902,15 +905,35 @@  static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
 				   sizeof(*cl), is_mask);
 		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
 	}
-	if (is_mask) {
+
+	/* For full flow keys the layer is determined based on the presence of
+	 * OVS_KEY_ATTR_ETHERNET
+	 */
+	if (is_mask)
 		/* Always exact match is_layer3 */
-		SW_FLOW_KEY_PUT(match, phy.is_layer3, true, is_mask);
-	} else {
-		if (*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET))
-			SW_FLOW_KEY_PUT(match, phy.is_layer3, false, is_mask);
-		else
-			SW_FLOW_KEY_PUT(match, phy.is_layer3, true, is_mask);
+		is_layer3 = true;
+	else
+		is_layer3 =  !(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET));
+	/* Packets from user space for execution only have metadata key
+	 * attributes.  OVS_KEY_ATTR_PACKET_ETHERTYPE is then used to specify
+	 * the starting layer of the packet.  Packets with Ethernet headers
+	 * have this attribute set to 0
+	 */
+	if (*attrs & (1ULL << OVS_KEY_ATTR_PACKET_ETHERTYPE)) {
+		__be16 eth_type;
+
+		if (is_mask) {
+			/* Always exact match packet EtherType */
+			eth_type = htons(0xffff);
+		} else {
+			eth_type = nla_get_be16(a[OVS_KEY_ATTR_PACKET_ETHERTYPE]);
+			is_layer3 = ((eth_type == htons(ETH_P_IP)) ||
+				     (eth_type == htons(ETH_P_IPV6)));
+		}
+		SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
 	}
+
+	SW_FLOW_KEY_PUT(match, phy.is_layer3, is_layer3, is_mask);
 	return 0;
 }