[ovs-dev,v3,1/4] dpif-netlink: Detect Out-Of-Resource condition on a netdev

Message ID 20180710062938.18939-2-sriharsha.basavapatna@broadcom.com
State Superseded
Headers show
Series
  • Support dynamic rebalancing of offloaded flows
Related show

Commit Message

Sriharsha Basavapatna via dev July 10, 2018, 6:29 a.m.
This is the first patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The patch detects OOR condition on a netdev port when ENOSPC error is
returned by TC-Flower while adding a flow rule. A new structure is added
to the netdev called "netdev_hw_info", to store OOR related information
required to perform dynamic offload-rebalancing.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
---
 lib/dpif-netlink.c    | 22 ++++++++++++++++++----
 lib/flow.c            | 27 +++++++++++++++++++++++++++
 lib/flow.h            |  1 +
 lib/netdev-provider.h |  7 +++++++
 lib/netdev.c          |  2 ++
 5 files changed, 55 insertions(+), 4 deletions(-)

Comments

Ben Pfaff July 10, 2018, 10:03 p.m. | #1
On Tue, Jul 10, 2018 at 11:59:35AM +0530, Sriharsha Basavapatna via dev wrote:
> This is the first patch in the patch-set to support dynamic rebalancing
> of offloaded flows.
> 
> The patch detects OOR condition on a netdev port when ENOSPC error is
> returned by TC-Flower while adding a flow rule. A new structure is added
> to the netdev called "netdev_hw_info", to store OOR related information
> required to perform dynamic offload-rebalancing.
> 
> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
> Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
> Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
> Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>

Thanks for the patch.

I spent some time adjusting the style to better fit what we usually do
these days in OVS, which puts declarations as close to first use as
practical.  I'm appending the incremental that I'd suggest folding in.

However I noticed a potentially serious bug while doing it.  Before this
patch, the code looked for output actions and took the dst_port from the
last one it found that was a tunnel.  After this patch, it does the same
thing but it also leaks a netdev for every output action other than the
first.  I added a "break;" to mitigate the damage but I guess it's not a
correct solution.

Thanks,

Ben.

--8<--------------------------cut here-------------------------->8--

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 5a6d53ad5697..ecc1ea5f4455 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -2097,16 +2097,12 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
     const struct dpif_class *dpif_class = dpif->dpif.dpif_class;
-    struct netdev_hw_info *hw_info;
     struct match match;
     odp_port_t in_port;
-    odp_port_t out_port;
     const struct nlattr *nla;
     size_t left;
     struct netdev *dev;
     struct netdev *outdev = NULL;
-    struct netdev *tunnel_netdev = NULL;
-    struct netdev *oor_netdev = NULL;
     struct offload_info info;
     ovs_be16 dst_port = 0;
     int err;
@@ -2137,7 +2133,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
         if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
             const struct netdev_tunnel_config *tnl_cfg;
 
-            out_port = nl_attr_get_odp_port(nla);
+            odp_port_t out_port = nl_attr_get_odp_port(nla);
             outdev = netdev_ports_get(out_port, dpif_class);
             if (!outdev) {
                 err = EOPNOTSUPP;
@@ -2147,6 +2143,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
             if (tnl_cfg && tnl_cfg->dst_port != 0) {
                 dst_port = tnl_cfg->dst_port;
             }
+            break;
         }
     }
 
@@ -2177,18 +2174,16 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
 
         VLOG_DBG("added flow");
     } else if (err != EEXIST) {
-        if (outdev && dev && (err == ENOSPC)) {
-                tunnel_netdev = flow_get_tunnel_netdev(&match.flow.tunnel);
-                if (tunnel_netdev) {
-                    oor_netdev = tunnel_netdev;
-                } else {
-                    oor_netdev = dev;
-                }
-                hw_info = &oor_netdev->hw_info;
-                hw_info->oor = true;
+        struct netdev *oor_netdev = NULL;
+        if (outdev && err == ENOSPC) {
+            oor_netdev = flow_get_tunnel_netdev(&match.flow.tunnel);
+            if (!oor_netdev) {
+                oor_netdev = dev;
+            }
+            oor_netdev->hw_info.oor = true;
         }
         VLOG_ERR_RL(&rl, "failed to offload flow: %s: %s", ovs_strerror(err),
-                    (oor_netdev ? oor_netdev->name : dev->name));
+                    oor_netdev ? oor_netdev->name : dev->name);
     }
 
 out:
diff --git a/lib/flow.c b/lib/flow.c
index 90a1c0a3aa21..c1191e368419 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -3410,11 +3410,7 @@ flow_limit_vlans(int vlan_limit)
 struct netdev *
 flow_get_tunnel_netdev(struct flow_tnl *tunnel)
 {
-    struct netdev *tunnel_netdev;
-    char iface[IFNAMSIZ];
     struct in6_addr ip6;
-    struct in6_addr gw;
-
     if (tunnel->ip_src) {
         in6_addr_set_mapped_ipv4(&ip6, tunnel->ip_src);
     } else if (ipv6_addr_is_set(&tunnel->ipv6_src)) {
@@ -3423,10 +3419,11 @@ flow_get_tunnel_netdev(struct flow_tnl *tunnel)
         return NULL;
     }
 
+    char iface[IFNAMSIZ];
+    struct in6_addr gw;
     if (!ovs_router_lookup(0, &ip6, iface, NULL, &gw)) {
-        return (NULL);
+        return NULL;
     }
 
-    tunnel_netdev = netdev_from_name(iface);
-    return tunnel_netdev;
+    return netdev_from_name(iface);
 }
diff --git a/lib/netdev.c b/lib/netdev.c
index b17f0563fb3b..3e66158824e9 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -2473,7 +2473,6 @@ netdev_free_custom_stats_counters(struct netdev_custom_stats *custom_stats)
 }
 
 #ifdef __linux__
-
 static void
 netdev_ports_flow_init(void)
 {
Sriharsha Basavapatna via dev July 11, 2018, noon | #2
On Wed, Jul 11, 2018 at 3:33 AM, Ben Pfaff <blp@ovn.org> wrote:
> On Tue, Jul 10, 2018 at 11:59:35AM +0530, Sriharsha Basavapatna via dev wrote:
>> This is the first patch in the patch-set to support dynamic rebalancing
>> of offloaded flows.
>>
>> The patch detects OOR condition on a netdev port when ENOSPC error is
>> returned by TC-Flower while adding a flow rule. A new structure is added
>> to the netdev called "netdev_hw_info", to store OOR related information
>> required to perform dynamic offload-rebalancing.
>>
>> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
>> Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
>> Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
>> Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
>
> Thanks for the patch.
>
> I spent some time adjusting the style to better fit what we usually do
> these days in OVS, which puts declarations as close to first use as
> practical.  I'm appending the incremental that I'd suggest folding in.
>
> However I noticed a potentially serious bug while doing it.  Before this
> patch, the code looked for output actions and took the dst_port from the
> last one it found that was a tunnel.  After this patch, it does the same
> thing but it also leaks a netdev for every output action other than the
> first.  I added a "break;" to mitigate the damage but I guess it's not a
> correct solution.
>
> Thanks,
>
> Ben.

Hi Ben,

Thanks for catching this issue. This code was based on an earlier assumption
that multiple output actions won't be specified with offloading; and looks
like that assumption is no more valid with the fix:
  "netdev-tc-offloads: Add offloading of multiple outputs"
  (commit: 00a0a011d328dc7a9ef163ab2066abe697bd1490).

I've restored the original code for 'outdev' (along with variable declarations
change that you suggested). I've also removed 'outdev' condition check in our
code while setting OOR, since that is not really needed (we just need
in_port/dev).

Please let me know if you want me to send out the updated patch-set or if I
should wait for other comments.

Thanks,
-Harsha

>
> --8<--------------------------cut here-------------------------->8--
>
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 5a6d53ad5697..ecc1ea5f4455 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -2097,16 +2097,12 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
>  {
>      static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>      const struct dpif_class *dpif_class = dpif->dpif.dpif_class;
> -    struct netdev_hw_info *hw_info;
>      struct match match;
>      odp_port_t in_port;
> -    odp_port_t out_port;
>      const struct nlattr *nla;
>      size_t left;
>      struct netdev *dev;
>      struct netdev *outdev = NULL;
> -    struct netdev *tunnel_netdev = NULL;
> -    struct netdev *oor_netdev = NULL;
>      struct offload_info info;
>      ovs_be16 dst_port = 0;
>      int err;
> @@ -2137,7 +2133,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
>          if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>              const struct netdev_tunnel_config *tnl_cfg;
>
> -            out_port = nl_attr_get_odp_port(nla);
> +            odp_port_t out_port = nl_attr_get_odp_port(nla);
>              outdev = netdev_ports_get(out_port, dpif_class);
>              if (!outdev) {
>                  err = EOPNOTSUPP;
> @@ -2147,6 +2143,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
>              if (tnl_cfg && tnl_cfg->dst_port != 0) {
>                  dst_port = tnl_cfg->dst_port;
>              }
> +            break;
>          }
>      }
>
> @@ -2177,18 +2174,16 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
>
>          VLOG_DBG("added flow");
>      } else if (err != EEXIST) {
> -        if (outdev && dev && (err == ENOSPC)) {
> -                tunnel_netdev = flow_get_tunnel_netdev(&match.flow.tunnel);
> -                if (tunnel_netdev) {
> -                    oor_netdev = tunnel_netdev;
> -                } else {
> -                    oor_netdev = dev;
> -                }
> -                hw_info = &oor_netdev->hw_info;
> -                hw_info->oor = true;
> +        struct netdev *oor_netdev = NULL;
> +        if (outdev && err == ENOSPC) {
> +            oor_netdev = flow_get_tunnel_netdev(&match.flow.tunnel);
> +            if (!oor_netdev) {
> +                oor_netdev = dev;
> +            }
> +            oor_netdev->hw_info.oor = true;
>          }
>          VLOG_ERR_RL(&rl, "failed to offload flow: %s: %s", ovs_strerror(err),
> -                    (oor_netdev ? oor_netdev->name : dev->name));
> +                    oor_netdev ? oor_netdev->name : dev->name);
>      }
>
>  out:
> diff --git a/lib/flow.c b/lib/flow.c
> index 90a1c0a3aa21..c1191e368419 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -3410,11 +3410,7 @@ flow_limit_vlans(int vlan_limit)
>  struct netdev *
>  flow_get_tunnel_netdev(struct flow_tnl *tunnel)
>  {
> -    struct netdev *tunnel_netdev;
> -    char iface[IFNAMSIZ];
>      struct in6_addr ip6;
> -    struct in6_addr gw;
> -
>      if (tunnel->ip_src) {
>          in6_addr_set_mapped_ipv4(&ip6, tunnel->ip_src);
>      } else if (ipv6_addr_is_set(&tunnel->ipv6_src)) {
> @@ -3423,10 +3419,11 @@ flow_get_tunnel_netdev(struct flow_tnl *tunnel)
>          return NULL;
>      }
>
> +    char iface[IFNAMSIZ];
> +    struct in6_addr gw;
>      if (!ovs_router_lookup(0, &ip6, iface, NULL, &gw)) {
> -        return (NULL);
> +        return NULL;
>      }
>
> -    tunnel_netdev = netdev_from_name(iface);
> -    return tunnel_netdev;
> +    return netdev_from_name(iface);
>  }
> diff --git a/lib/netdev.c b/lib/netdev.c
> index b17f0563fb3b..3e66158824e9 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -2473,7 +2473,6 @@ netdev_free_custom_stats_counters(struct netdev_custom_stats *custom_stats)
>  }
>
>  #ifdef __linux__
> -
>  static void
>  netdev_ports_flow_init(void)
>  {
Ben Pfaff July 11, 2018, 3:22 p.m. | #3
On Wed, Jul 11, 2018 at 05:30:13PM +0530, Sriharsha Basavapatna wrote:
> On Wed, Jul 11, 2018 at 3:33 AM, Ben Pfaff <blp@ovn.org> wrote:
> > On Tue, Jul 10, 2018 at 11:59:35AM +0530, Sriharsha Basavapatna via dev wrote:
> >> This is the first patch in the patch-set to support dynamic rebalancing
> >> of offloaded flows.
> >>
> >> The patch detects OOR condition on a netdev port when ENOSPC error is
> >> returned by TC-Flower while adding a flow rule. A new structure is added
> >> to the netdev called "netdev_hw_info", to store OOR related information
> >> required to perform dynamic offload-rebalancing.
> >>
> >> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
> >> Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
> >> Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
> >> Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
> >
> > Thanks for the patch.
> >
> > I spent some time adjusting the style to better fit what we usually do
> > these days in OVS, which puts declarations as close to first use as
> > practical.  I'm appending the incremental that I'd suggest folding in.
> >
> > However I noticed a potentially serious bug while doing it.  Before this
> > patch, the code looked for output actions and took the dst_port from the
> > last one it found that was a tunnel.  After this patch, it does the same
> > thing but it also leaks a netdev for every output action other than the
> > first.  I added a "break;" to mitigate the damage but I guess it's not a
> > correct solution.
> >
> > Thanks,
> >
> > Ben.
> 
> Hi Ben,
> 
> Thanks for catching this issue. This code was based on an earlier assumption
> that multiple output actions won't be specified with offloading; and looks
> like that assumption is no more valid with the fix:
>   "netdev-tc-offloads: Add offloading of multiple outputs"
>   (commit: 00a0a011d328dc7a9ef163ab2066abe697bd1490).
> 
> I've restored the original code for 'outdev' (along with variable declarations
> change that you suggested). I've also removed 'outdev' condition check in our
> code while setting OOR, since that is not really needed (we just need
> in_port/dev).
> 
> Please let me know if you want me to send out the updated patch-set or if I
> should wait for other comments.

Please send the revised patches.

Thanks,

Ben.

Patch

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index aa9bbd946..5a6d53ad5 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -2097,11 +2097,16 @@  parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
     const struct dpif_class *dpif_class = dpif->dpif.dpif_class;
+    struct netdev_hw_info *hw_info;
     struct match match;
     odp_port_t in_port;
+    odp_port_t out_port;
     const struct nlattr *nla;
     size_t left;
     struct netdev *dev;
+    struct netdev *outdev = NULL;
+    struct netdev *tunnel_netdev = NULL;
+    struct netdev *oor_netdev = NULL;
     struct offload_info info;
     ovs_be16 dst_port = 0;
     int err;
@@ -2131,8 +2136,6 @@  parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
     NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
         if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
             const struct netdev_tunnel_config *tnl_cfg;
-            struct netdev *outdev;
-            odp_port_t out_port;
 
             out_port = nl_attr_get_odp_port(nla);
             outdev = netdev_ports_get(out_port, dpif_class);
@@ -2144,7 +2147,6 @@  parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
             if (tnl_cfg && tnl_cfg->dst_port != 0) {
                 dst_port = tnl_cfg->dst_port;
             }
-            netdev_close(outdev);
         }
     }
 
@@ -2175,7 +2177,18 @@  parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
 
         VLOG_DBG("added flow");
     } else if (err != EEXIST) {
-        VLOG_ERR_RL(&rl, "failed to offload flow: %s", ovs_strerror(err));
+        if (outdev && dev && (err == ENOSPC)) {
+                tunnel_netdev = flow_get_tunnel_netdev(&match.flow.tunnel);
+                if (tunnel_netdev) {
+                    oor_netdev = tunnel_netdev;
+                } else {
+                    oor_netdev = dev;
+                }
+                hw_info = &oor_netdev->hw_info;
+                hw_info->oor = true;
+        }
+        VLOG_ERR_RL(&rl, "failed to offload flow: %s: %s", ovs_strerror(err),
+                    (oor_netdev ? oor_netdev->name : dev->name));
     }
 
 out:
@@ -2196,6 +2209,7 @@  out:
         }
     }
 
+    netdev_close(outdev);
     netdev_close(dev);
 
     return err;
diff --git a/lib/flow.c b/lib/flow.c
index 75ca45672..912afc99d 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -19,6 +19,7 @@ 
 #include <errno.h>
 #include <inttypes.h>
 #include <limits.h>
+#include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip6.h>
@@ -41,6 +42,8 @@ 
 #include "unaligned.h"
 #include "util.h"
 #include "openvswitch/nsh.h"
+#include "ovs-router.h"
+#include "lib/netdev-provider.h"
 
 COVERAGE_DEFINE(flow_extract);
 COVERAGE_DEFINE(miniflow_malloc);
@@ -3299,3 +3302,27 @@  flow_limit_vlans(int vlan_limit)
         flow_vlan_limit = MIN(vlan_limit, FLOW_MAX_VLAN_HEADERS);
     }
 }
+
+struct netdev *
+flow_get_tunnel_netdev(struct flow_tnl *tunnel)
+{
+    struct netdev *tunnel_netdev;
+    char iface[IFNAMSIZ];
+    struct in6_addr ip6;
+    struct in6_addr gw;
+
+    if (tunnel->ip_src) {
+        in6_addr_set_mapped_ipv4(&ip6, tunnel->ip_src);
+    } else if (ipv6_addr_is_set(&tunnel->ipv6_src)) {
+        ip6 = tunnel->ipv6_src;
+    } else {
+        return NULL;
+    }
+
+    if (!ovs_router_lookup(0, &ip6, iface, NULL, &gw)) {
+        return (NULL);
+    }
+
+    tunnel_netdev = netdev_from_name(iface);
+    return tunnel_netdev;
+}
diff --git a/lib/flow.h b/lib/flow.h
index 5b6585f11..a67abc9c9 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -73,6 +73,7 @@  void flow_extract(struct dp_packet *, struct flow *);
 void flow_zero_wildcards(struct flow *, const struct flow_wildcards *);
 void flow_unwildcard_tp_ports(const struct flow *, struct flow_wildcards *);
 void flow_get_metadata(const struct flow *, struct match *flow_metadata);
+struct netdev *flow_get_tunnel_netdev(struct flow_tnl *tunnel);
 
 const char *ct_state_to_string(uint32_t state);
 uint32_t ct_state_from_string(const char *);
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 1a572f5b8..62e05619e 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -35,6 +35,11 @@  extern "C" {
 struct netdev_tnl_build_header_params;
 #define NETDEV_NUMA_UNSPEC OVS_NUMA_UNSPEC
 
+/* Offload-capable (HW) netdev information */
+struct netdev_hw_info {
+    bool oor;		/* Out of Offload Resources ? */
+};
+
 /* A network device (e.g. an Ethernet device).
  *
  * Network device implementations may read these members but should not modify
@@ -80,6 +85,8 @@  struct netdev {
     int n_rxq;
     struct shash_node *node;            /* Pointer to element in global map. */
     struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". */
+
+    struct netdev_hw_info hw_info;	/* offload-capable netdev info */
 };
 
 static inline void
diff --git a/lib/netdev.c b/lib/netdev.c
index 82ffeb901..b17f0563f 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -415,6 +415,7 @@  netdev_open(const char *name, const char *type, struct netdev **netdevp)
                 netdev->reconfigure_seq = seq_create();
                 netdev->last_reconfigure_seq =
                     seq_read(netdev->reconfigure_seq);
+                netdev->hw_info.oor = false;
                 netdev->node = shash_add(&netdev_shash, name, netdev);
 
                 /* By default enable one tx and rx queue per netdev. */
@@ -2472,6 +2473,7 @@  netdev_free_custom_stats_counters(struct netdev_custom_stats *custom_stats)
 }
 
 #ifdef __linux__
+
 static void
 netdev_ports_flow_init(void)
 {