diff mbox

[ovs-dev,v10,4/5] userspace: extend layer 3 support to cover non-IP packets

Message ID 1462347265-21887-5-git-send-email-simon.horman@netronome.com
State Changes Requested
Headers show

Commit Message

Simon Horman May 4, 2016, 7:34 a.m. UTC
Extend support for layer 3 packets to cover non-IP packets.

This removes the assumption that the first octet of a layer 3 packet
indicates the IP protocol version - true for IP (v4 and v6), but not
for necessarily for other protocols.

The key motivation for this is to allow forwarding of MPLS packets which
are technically layer 2.5 rather than 3 but the distinction seems unimportant
here.

This relies on datapaths setting OVS_KEY_ATTR_PACKET_ETHERTYPE to
the ethernet type corresponding to the protocol of layer 3 packets
on a flow miss.

Signed-off-by: Simon Horman <simon.horman@netronome.com>

---
v10
* Delete now bogus comment about protocol assumptions for L3 packets

v9
* New patch
---
 include/openvswitch/ofp-print.h |  7 +++++-
 lib/dp-packet.h                 |  2 +-
 lib/dpif-netdev.c               |  5 +---
 lib/dpif.c                      |  9 ++-----
 lib/flow.c                      | 40 +++++++++----------------------
 lib/odp-util.c                  | 53 ++++++++++++++++++++++++++++++-----------
 lib/ofp-print.c                 | 24 +++++++++++++++----
 lib/packets.c                   | 14 ++++++++---
 8 files changed, 90 insertions(+), 64 deletions(-)

Comments

Ben Pfaff May 31, 2016, 8:56 p.m. UTC | #1
On Wed, May 04, 2016 at 04:34:24PM +0900, Simon Horman wrote:
> Extend support for layer 3 packets to cover non-IP packets.
> 
> This removes the assumption that the first octet of a layer 3 packet
> indicates the IP protocol version - true for IP (v4 and v6), but not
> for necessarily for other protocols.
> 
> The key motivation for this is to allow forwarding of MPLS packets which
> are technically layer 2.5 rather than 3 but the distinction seems unimportant
> here.
> 
> This relies on datapaths setting OVS_KEY_ATTR_PACKET_ETHERTYPE to
> the ethernet type corresponding to the protocol of layer 3 packets
> on a flow miss.
> 
> Signed-off-by: Simon Horman <simon.horman@netronome.com>

Acked-by: Ben Pfaff <blp@ovn.org>
diff mbox

Patch

diff --git a/include/openvswitch/ofp-print.h b/include/openvswitch/ofp-print.h
index 3e951173acc2..dce80a7cbc88 100644
--- a/include/openvswitch/ofp-print.h
+++ b/include/openvswitch/ofp-print.h
@@ -23,6 +23,8 @@ 
 #include <stdio.h>
 #include <stdbool.h>
 
+#include <openvswitch/types.h>
+
 struct ds;
 struct ofp10_match;
 struct ofp_flow_mod;
@@ -30,6 +32,7 @@  struct ofp_header;
 struct ofputil_flow_stats;
 struct ofputil_table_features;
 struct ofputil_table_stats;
+struct dp_packet;
 
 #ifdef  __cplusplus
 extern "C" {
@@ -42,7 +45,9 @@  void ofp10_match_print(struct ds *, const struct ofp10_match *, int verbosity);
 
 char *ofp_to_string(const void *, size_t, int verbosity);
 char *ofp10_match_to_string(const struct ofp10_match *, int verbosity);
-char *ofp_packet_to_string(const void *data, size_t len, bool is_layer3);
+char *ofp_packet_to_string(const void *data, size_t len,
+			   ovs_be16 packet_ethertype);
+char *ofp_dp_packet_to_string(const struct dp_packet *);
 
 void ofp_print_flow_stats(struct ds *, struct ofputil_flow_stats *);
 void ofp_print_version(const struct ofp_header *, struct ds *);
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index b9b549e1e09f..6d7e33b9c124 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -253,7 +253,7 @@  dp_packet_equal(const struct dp_packet *a, const struct dp_packet *b)
 static inline bool
 dp_packet_is_l3(const struct dp_packet *b)
 {
-    return b->l3_ofs == 0;
+    return b->l3_ofs == 0 || b->l2_5_ofs == 0;
 }
 
 /* Get the start of the Ethernet frame.  Return NULL if 'b' is an l3 packet
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 89e6ef400cae..666a8ed1d109 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3250,10 +3250,7 @@  dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
 
         ofpbuf_init(&key, 0);
         odp_flow_key_from_flow(&odp_parms, &key);
-        packet_str = ofp_packet_to_string(dp_packet_data(packet_),
-                                          dp_packet_size(packet_),
-                                          dp_packet_is_l3(packet_));
-
+        packet_str = ofp_dp_packet_to_string(packet_);
         odp_flow_key_format(key.data, key.size, &ds);
 
         VLOG_DBG("%s: %s upcall:\n%s\n%s", dp->name,
diff --git a/lib/dpif.c b/lib/dpif.c
index cf80b6a1f51c..1a34cdeffc66 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1390,10 +1390,7 @@  dpif_print_packet(struct dpif *dpif, struct dpif_upcall *upcall)
         struct ds flow;
         char *packet;
 
-        packet = ofp_packet_to_string(dp_packet_data(&upcall->packet),
-                                      dp_packet_size(&upcall->packet),
-                                      dp_packet_is_l3(&upcall->packet));
-
+        packet = ofp_dp_packet_to_string(&upcall->packet);
         ds_init(&flow);
         odp_flow_key_format(upcall->key, upcall->key_len, &flow);
 
@@ -1686,9 +1683,7 @@  log_execute_message(struct dpif *dpif, const struct dpif_execute *execute,
         struct ds ds = DS_EMPTY_INITIALIZER;
         char *packet;
 
-        packet = ofp_packet_to_string(dp_packet_data(execute->packet),
-                                      dp_packet_size(execute->packet),
-                                      dp_packet_is_l3(execute->packet));
+        packet = ofp_dp_packet_to_string(execute->packet);
         ds_put_format(&ds, "%s: %sexecute ",
                       dpif_name(dpif),
                       (subexecute ? "sub-"
diff --git a/lib/flow.c b/lib/flow.c
index 3896531c0936..8f3e8200150f 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -439,23 +439,6 @@  invalid:
     arp_buf[1] = eth_addr_zero;
 }
 
-/* Determines IP version if a layer 3 packet */
-static ovs_be16
-get_l3_eth_type(struct dp_packet *packet)
-{
-    struct ip_header *ip = dp_packet_l3(packet);
-    int ip_ver = IP_VER(ip->ip_ihl_ver);
-
-    switch (ip_ver) {
-    case 4:
-        return htons(ETH_TYPE_IP);
-    case 6:
-        return htons(ETH_TYPE_IPV6);
-    default:
-        return 0;
-    }
-}
-
 /* Initializes 'flow' members from 'packet' and 'md'.
  * Expects packet->l3_ofs to be set to 0 for layer 3 packets.
  *
@@ -572,29 +555,28 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
             miniflow_push_be16(mf, dl_type, dl_type);
             miniflow_push_be16(mf, vlan_tci, vlan_tci);
         }
-
-        /* Parse mpls. */
-        if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
-            int count;
-            const void *mpls = data;
-
-            packet->l2_5_ofs = (char *)data - frame;
-            count = parse_mpls(&data, &size);
-            miniflow_push_words_32(mf, mpls_lse, mpls, count);
-        }
     } else {
-        /* We assume L3 packets are either IPv4 or IPv6. */
         packet->l3_ofs = 0;
         miniflow_pad_from_64(mf, base_layer);
         miniflow_push_uint8(mf, base_layer, LAYER_3);
         miniflow_pad_to_64(mf, base_layer);
 
-        dl_type = get_l3_eth_type(packet);
+        dl_type = packet->md.packet_ethertype;
         miniflow_pad_from_64(mf, dl_type);
         miniflow_push_be16(mf, dl_type, dl_type);
         miniflow_push_be16(mf, vlan_tci, 0);
     }
 
+    /* Parse mpls. */
+    if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
+        int count;
+        const void *mpls = data;
+
+        packet->l2_5_ofs = (char *)data - frame;
+        count = parse_mpls(&data, &size);
+        miniflow_push_words_32(mf, mpls_lse, mpls, count);
+    }
+
     /* Network layer. */
     packet->l3_ofs = (char *)data - frame;
 
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 0345963a5f17..38b4520ecfcd 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -4365,6 +4365,8 @@  odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms,
         }
 
         nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, data->dl_type);
+    } else {
+        nl_msg_put_be16(buf, OVS_KEY_ATTR_PACKET_ETHERTYPE, data->dl_type);
     }
 
     if (flow->dl_type == htons(ETH_TYPE_IP)) {
@@ -4611,12 +4613,13 @@  odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len,
             md->base_layer = LAYER_2;
             wanted_attrs &= ~(1u << OVS_KEY_ATTR_ETHERNET);
             break;
+        case OVS_KEY_ATTR_PACKET_ETHERTYPE:
+            md->packet_ethertype = nl_attr_get_be16(nla);
+            break;
         case OVS_KEY_ATTR_IPV4:
-            md->packet_ethertype = htons(ETH_TYPE_IP);
             wanted_attrs &= ~(1u << OVS_KEY_ATTR_IPV4);
             break;
         case OVS_KEY_ATTR_IPV6:
-            md->packet_ethertype = htons(ETH_TYPE_IPV6);
             wanted_attrs &= ~(1u << OVS_KEY_ATTR_IPV6);
             break;
         default:
@@ -4769,6 +4772,29 @@  check_expectations(uint64_t present_attrs, int out_of_range_attr,
 }
 
 static bool
+parse_ethertype__(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
+                  uint64_t *expected_attrs, struct flow *flow,
+                  const struct flow *src_flow, unsigned attr_idx, bool is_mask)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+    flow->dl_type = nl_attr_get_be16(attrs[attr_idx]);
+
+    if (!is_mask && ntohs(flow->dl_type) < ETH_TYPE_MIN) {
+        VLOG_ERR_RL(&rl, "invalid Ethertype %"PRIu16" in flow key",
+                    ntohs(flow->dl_type));
+        return false;
+    }
+    if (is_mask && (!src_flow || ntohs(src_flow->dl_type) < ETH_TYPE_MIN) &&
+        flow->dl_type != htons(0xffff)) {
+        return false;
+    }
+    *expected_attrs |= UINT64_C(1) << attr_idx;
+
+    return true;
+}
+
+static bool
 parse_ethertype(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
                 uint64_t present_attrs, uint64_t *expected_attrs,
                 struct flow *flow, const struct flow *src_flow)
@@ -4777,17 +4803,11 @@  parse_ethertype(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
     bool is_mask = flow != src_flow;
 
     if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE)) {
-        flow->dl_type = nl_attr_get_be16(attrs[OVS_KEY_ATTR_ETHERTYPE]);
-        if (!is_mask && ntohs(flow->dl_type) < ETH_TYPE_MIN) {
-            VLOG_ERR_RL(&rl, "invalid Ethertype %"PRIu16" in flow key",
-                        ntohs(flow->dl_type));
-            return false;
-        }
-        if (is_mask && ntohs(src_flow->dl_type) < ETH_TYPE_MIN &&
-            flow->dl_type != htons(0xffff)) {
-            return false;
-        }
-        *expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE;
+        return parse_ethertype__(attrs, expected_attrs, flow, src_flow,
+                                 OVS_KEY_ATTR_ETHERTYPE, is_mask);
+    } else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PACKET_ETHERTYPE)) {
+        return parse_ethertype__(attrs, expected_attrs, flow, src_flow,
+                                 OVS_KEY_ATTR_PACKET_ETHERTYPE, is_mask);
     } else {
         if (!is_mask) {
             if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV4)) {
@@ -4797,6 +4817,8 @@  parse_ethertype(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
             } else {
                 flow->dl_type = htons(FLOW_DL_TYPE_NONE);
             }
+        } else if (src_flow->base_layer == LAYER_3) {
+            flow->dl_type = htons(0xffff);
         } else if (ntohs(src_flow->dl_type) < ETH_TYPE_MIN) {
             /* See comments in odp_flow_key_from_flow__(). */
             VLOG_ERR_RL(&rl, "mask expected for non-Ethernet II frame");
@@ -5219,7 +5241,10 @@  odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len,
         put_ethernet_key(eth_key, flow);
         flow->base_layer = LAYER_2;
         expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET;
-    } else {
+    } else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PACKET_ETHERTYPE)) {
+        flow->base_layer = LAYER_3;
+        expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_PACKET_ETHERTYPE;
+    } else if (is_mask && src_flow->base_layer == LAYER_3) {
         flow->base_layer = LAYER_3;
     }
 
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index f10e48625b7e..1b99010720f3 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -58,7 +58,7 @@  static void ofp_print_error(struct ds *, enum ofperr);
 /* Returns a string that represents the contents of the packet in the
  * 'len' bytes starting at 'data'.  The caller must free the returned string.*/
 char *
-ofp_packet_to_string(const void *data, size_t len, bool is_layer3)
+ofp_packet_to_string(const void *data, size_t len, ovs_be16 packet_ethertype)
 {
     struct ds ds = DS_EMPTY_INITIALIZER;
     struct dp_packet buf;
@@ -66,7 +66,9 @@  ofp_packet_to_string(const void *data, size_t len, bool is_layer3)
     size_t l4_size;
 
     dp_packet_use_const(&buf, data, len);
-    if (is_layer3) {
+    if (packet_ethertype) {
+        /* This is a layer 3 packet */
+        buf.md.packet_ethertype = packet_ethertype;
         buf.l3_ofs = 0;
     }
     flow_extract(&buf, &flow);
@@ -99,6 +101,17 @@  ofp_packet_to_string(const void *data, size_t len, bool is_layer3)
     return ds_cstr(&ds);
 }
 
+/* Returns a string that represents the contents of the packet in the
+ * 'len' bytes starting at 'data'.  The caller must free the returned string.*/
+char *
+ofp_dp_packet_to_string(const struct dp_packet *p)
+{
+    ovs_assert(!dp_packet_is_l3(p) || ntohs(p->md.packet_ethertype));
+    return ofp_packet_to_string(dp_packet_data(p), dp_packet_size(p),
+                                dp_packet_is_l3(p) ? p->md.packet_ethertype
+                                : htons(0));
+}
+
 static void
 format_hex_arg(struct ds *s, const uint8_t *data, size_t len)
 {
@@ -203,7 +216,7 @@  ofp_print_packet_in(struct ds *string, const struct ofp_header *oh,
 
     if (verbosity > 0) {
         char *packet = ofp_packet_to_string(public->packet,
-                                            public->packet_len, false);
+                                            public->packet_len, htons(0));
         ds_put_cstr(string, packet);
         free(packet);
     }
@@ -239,7 +252,8 @@  ofp_print_packet_out(struct ds *string, const struct ofp_header *oh,
     if (po.buffer_id == UINT32_MAX) {
         ds_put_format(string, " data_len=%"PRIuSIZE, po.packet_len);
         if (verbosity > 0 && po.packet_len > 0) {
-            char *packet = ofp_packet_to_string(po.packet, po.packet_len, false);
+            char *packet = ofp_packet_to_string(po.packet, po.packet_len,
+                                                htons(0));
             ds_put_char(string, '\n');
             ds_put_cstr(string, packet);
             free(packet);
@@ -3508,5 +3522,5 @@  ofp_print(FILE *stream, const void *oh, size_t len, int verbosity)
 void
 ofp_print_packet(FILE *stream, const void *data, size_t len)
 {
-    print_and_free(stream, ofp_packet_to_string(data, len, false));
+    print_and_free(stream, ofp_packet_to_string(data, len, htons(0)));
 }
diff --git a/lib/packets.c b/lib/packets.c
index b9e2182827cb..bdadb34b64e4 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -233,16 +233,24 @@  push_eth(struct dp_packet *packet, const struct eth_addr *dst,
     eh->eth_type = type;
 }
 
-/* Removes Ethernet header, including all VLAN and MPLS headers, from 'packet'.
+/* Removes Ethernet header, including VLAN header, from 'packet'.
  *
  * Previous to calling this function, 'ofpbuf_l3(packet)' must not be NULL */
 void
 pop_eth(struct dp_packet *packet)
 {
+    char *l2_5 = dp_packet_l2_5(packet);;
+    int increment;
+
     ovs_assert(dp_packet_l3(packet) != NULL);
 
-    dp_packet_resize_l2_5(packet, -packet->l3_ofs);
-    dp_packet_set_l2_5(packet, NULL);
+    if (l2_5) {
+        increment = packet->l2_5_ofs;
+    } else {
+        increment = packet->l3_ofs;
+    }
+
+    dp_packet_resize_l2(packet, -increment);
 }
 
 /* Set ethertype of the packet. */