diff mbox series

[ovs-dev,6/6] netdev-linux: monitor and offload LAG slaves to TC

Message ID 1529588161-15934-7-git-send-email-john.hurley@netronome.com
State Superseded
Headers show
Series offload Linux LAG devices to the TC datapath | expand

Commit Message

John Hurley June 21, 2018, 1:36 p.m. UTC
A LAG slave cannot be added directly to an OvS bridge, nor can a OvS
bridge port be added to a LAG dev. However, LAG masters can be added to
OvS.

Use TC blocks to indirectly offload slaves when their master is attached
as a linux-netdev to an OvS bridge. In the kernel TC datapath, blocks link
together netdevs in a similar way to LAG devices. For example, if a filter
is added to a block then it is added to all block devices, or if stats are
incremented on 1 device then the stats on the entire block are incremented.
This mimics LAG devices in that if a rule is applied to the LAG master
then it should be applied to all slaves etc.

Monitor LAG slaves via the netlink socket in netdev-linux and, if their
master is attached to the OvS bridge and has a block id, add the slave's
qdisc to the same block. Similarly, if a slave is freed from a master,
remove the qdisc from the masters block.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 lib/netdev-linux.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

Comments

Roi Dayan June 26, 2018, 5:35 a.m. UTC | #1
On 21/06/2018 16:36, John Hurley wrote:
> A LAG slave cannot be added directly to an OvS bridge, nor can a OvS
> bridge port be added to a LAG dev. However, LAG masters can be added to
> OvS.
> 
> Use TC blocks to indirectly offload slaves when their master is attached
> as a linux-netdev to an OvS bridge. In the kernel TC datapath, blocks link
> together netdevs in a similar way to LAG devices. For example, if a filter
> is added to a block then it is added to all block devices, or if stats are
> incremented on 1 device then the stats on the entire block are incremented.
> This mimics LAG devices in that if a rule is applied to the LAG master
> then it should be applied to all slaves etc.
> 
> Monitor LAG slaves via the netlink socket in netdev-linux and, if their
> master is attached to the OvS bridge and has a block id, add the slave's
> qdisc to the same block. Similarly, if a slave is freed from a master,
> remove the qdisc from the masters block.
> 
> Signed-off-by: John Hurley <john.hurley@netronome.com>
> Reviewed-by: Simon Horman <simon.horman@netronome.com>
> Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
> ---
>  lib/netdev-linux.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 69 insertions(+)
> 
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 81de3b1..f2b1dfd 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -233,6 +233,18 @@ enum {
>      VALID_FEATURES          = 1 << 7,
>  };
>  
> +struct linux_lag_slave {
> +   uint32_t block_id;
> +   struct shash_node *node;
> +};
> +
> +/* Protects 'lag_shash' and the mutable members of struct linux_lag_slave. */
> +static struct ovs_mutex lag_mutex = OVS_MUTEX_INITIALIZER;
> +
> +/* All slaves whose LAG masters are network devices in OvS. */
> +static struct shash lag_shash OVS_GUARDED_BY(lag_mutex)
> +    = SHASH_INITIALIZER(&lag_shash);
> +
>  /* Traffic control. */
>  
>  /* An instance of a traffic control class.  Always associated with a particular
> @@ -692,6 +704,57 @@ netdev_linux_kind_is_lag(const char *kind)
>  }
>  
>  static void
> +netdev_linux_update_lag(struct rtnetlink_change *change)
> +    OVS_REQUIRES(lag_mutex)
> +{
> +    struct linux_lag_slave *lag;
> +
> +    if (change->slave && netdev_linux_kind_is_lag(change->slave)) {
> +        lag = shash_find_data(&lag_shash, change->ifname);
> +
> +        if (!lag) {
> +            struct netdev *master_netdev;
> +            char master_name[IFNAMSIZ];
> +            uint32_t block_id;
> +            int error = 0;
> +
> +            if_indextoname(change->master_ifindex, master_name);
> +            master_netdev = netdev_from_name(master_name);
> +
> +            if (is_netdev_linux_class(master_netdev->netdev_class)) {
> +                block_id = netdev_get_block_id(master_netdev);
> +                if (!block_id) {
> +                   return;
> +                }
> +
> +                lag = xmalloc(sizeof *lag);
> +                lag->block_id = block_id;
> +                lag->node = shash_add(&lag_shash, change->ifname, lag);
> +
> +                /* LAG master is linux netdev so add slave to same block. */
> +                error = tc_add_del_ingress_qdisc(change->if_index, true,
> +                                                 block_id);
> +                if (error) {
> +                    VLOG_WARN("failed to bind LAG slave to master's block");
> +                    shash_delete(&lag_shash, lag->node);
> +                    free(lag);
> +                }
> +            }
> +        }
> +    } else if (change->master_ifindex == 0) {
> +        /* Check if this was a lag slave that has been freed. */
> +        lag = shash_find_data(&lag_shash, change->ifname);

hi,

I can reproduce a crash here when playing with devices in namespaces.
I get into netdev_linux_run() with a device that I put in a namespace.
In netdev_linux_run(), it looks like if_indextoname() returnns null so
change.ifname is null and so is netdev_.
Because of this we go into netdev_linux_update_lag() with change->ifname as null.
shash_find_data() segfaults because it cannot accept null.

Maybe in the caller instead of checking only !netdev_ also change.ifname is not null.
as change.ifname is needed for both if-cases.


> +
> +        if (lag) {
> +            tc_add_del_ingress_qdisc(change->if_index, false,
> +                                     lag->block_id);
> +            shash_delete(&lag_shash, lag->node);
> +            free(lag);
> +        }
> +    }
> +}
> +
> +static void
>  netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED)
>  {
>      struct nl_sock *sock;
> @@ -734,6 +797,12 @@ netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED)
>                      netdev_linux_update(netdev, nsid, &change);
>                      ovs_mutex_unlock(&netdev->mutex);
>                  }
> +                else if (!netdev_) {
> +                    /* Netdev is not present in OvS but its master could be. */
> +                    ovs_mutex_lock(&lag_mutex);
> +                    netdev_linux_update_lag(&change);
> +                    ovs_mutex_unlock(&lag_mutex);
> +                }
>                  netdev_close(netdev_);
>              }
>          } else if (error == ENOBUFS) {
>
John Hurley June 26, 2018, 9:24 a.m. UTC | #2
On Tue, Jun 26, 2018 at 6:35 AM, Roi Dayan <roid@mellanox.com> wrote:
>
>
> On 21/06/2018 16:36, John Hurley wrote:
>> A LAG slave cannot be added directly to an OvS bridge, nor can a OvS
>> bridge port be added to a LAG dev. However, LAG masters can be added to
>> OvS.
>>
>> Use TC blocks to indirectly offload slaves when their master is attached
>> as a linux-netdev to an OvS bridge. In the kernel TC datapath, blocks link
>> together netdevs in a similar way to LAG devices. For example, if a filter
>> is added to a block then it is added to all block devices, or if stats are
>> incremented on 1 device then the stats on the entire block are incremented.
>> This mimics LAG devices in that if a rule is applied to the LAG master
>> then it should be applied to all slaves etc.
>>
>> Monitor LAG slaves via the netlink socket in netdev-linux and, if their
>> master is attached to the OvS bridge and has a block id, add the slave's
>> qdisc to the same block. Similarly, if a slave is freed from a master,
>> remove the qdisc from the masters block.
>>
>> Signed-off-by: John Hurley <john.hurley@netronome.com>
>> Reviewed-by: Simon Horman <simon.horman@netronome.com>
>> Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
>> ---
>>  lib/netdev-linux.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 69 insertions(+)
>>
>> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
>> index 81de3b1..f2b1dfd 100644
>> --- a/lib/netdev-linux.c
>> +++ b/lib/netdev-linux.c
>> @@ -233,6 +233,18 @@ enum {
>>      VALID_FEATURES          = 1 << 7,
>>  };
>>
>> +struct linux_lag_slave {
>> +   uint32_t block_id;
>> +   struct shash_node *node;
>> +};
>> +
>> +/* Protects 'lag_shash' and the mutable members of struct linux_lag_slave. */
>> +static struct ovs_mutex lag_mutex = OVS_MUTEX_INITIALIZER;
>> +
>> +/* All slaves whose LAG masters are network devices in OvS. */
>> +static struct shash lag_shash OVS_GUARDED_BY(lag_mutex)
>> +    = SHASH_INITIALIZER(&lag_shash);
>> +
>>  /* Traffic control. */
>>
>>  /* An instance of a traffic control class.  Always associated with a particular
>> @@ -692,6 +704,57 @@ netdev_linux_kind_is_lag(const char *kind)
>>  }
>>
>>  static void
>> +netdev_linux_update_lag(struct rtnetlink_change *change)
>> +    OVS_REQUIRES(lag_mutex)
>> +{
>> +    struct linux_lag_slave *lag;
>> +
>> +    if (change->slave && netdev_linux_kind_is_lag(change->slave)) {
>> +        lag = shash_find_data(&lag_shash, change->ifname);
>> +
>> +        if (!lag) {
>> +            struct netdev *master_netdev;
>> +            char master_name[IFNAMSIZ];
>> +            uint32_t block_id;
>> +            int error = 0;
>> +
>> +            if_indextoname(change->master_ifindex, master_name);
>> +            master_netdev = netdev_from_name(master_name);
>> +
>> +            if (is_netdev_linux_class(master_netdev->netdev_class)) {
>> +                block_id = netdev_get_block_id(master_netdev);
>> +                if (!block_id) {
>> +                   return;
>> +                }
>> +
>> +                lag = xmalloc(sizeof *lag);
>> +                lag->block_id = block_id;
>> +                lag->node = shash_add(&lag_shash, change->ifname, lag);
>> +
>> +                /* LAG master is linux netdev so add slave to same block. */
>> +                error = tc_add_del_ingress_qdisc(change->if_index, true,
>> +                                                 block_id);
>> +                if (error) {
>> +                    VLOG_WARN("failed to bind LAG slave to master's block");
>> +                    shash_delete(&lag_shash, lag->node);
>> +                    free(lag);
>> +                }
>> +            }
>> +        }
>> +    } else if (change->master_ifindex == 0) {
>> +        /* Check if this was a lag slave that has been freed. */
>> +        lag = shash_find_data(&lag_shash, change->ifname);
>
> hi,
>
> I can reproduce a crash here when playing with devices in namespaces.
> I get into netdev_linux_run() with a device that I put in a namespace.
> In netdev_linux_run(), it looks like if_indextoname() returnns null so
> change.ifname is null and so is netdev_.
> Because of this we go into netdev_linux_update_lag() with change->ifname as null.
> shash_find_data() segfaults because it cannot accept null.
>
> Maybe in the caller instead of checking only !netdev_ also change.ifname is not null.
> as change.ifname is needed for both if-cases.
>

Hi Roi,

Thanks for testing/reporting this.
Your explanation seems to make sense.
Let me reproduce and test the proposed fix.

Thanks,
John

>
>> +
>> +        if (lag) {
>> +            tc_add_del_ingress_qdisc(change->if_index, false,
>> +                                     lag->block_id);
>> +            shash_delete(&lag_shash, lag->node);
>> +            free(lag);
>> +        }
>> +    }
>> +}
>> +
>> +static void
>>  netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED)
>>  {
>>      struct nl_sock *sock;
>> @@ -734,6 +797,12 @@ netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED)
>>                      netdev_linux_update(netdev, nsid, &change);
>>                      ovs_mutex_unlock(&netdev->mutex);
>>                  }
>> +                else if (!netdev_) {
>> +                    /* Netdev is not present in OvS but its master could be. */
>> +                    ovs_mutex_lock(&lag_mutex);
>> +                    netdev_linux_update_lag(&change);
>> +                    ovs_mutex_unlock(&lag_mutex);
>> +                }
>>                  netdev_close(netdev_);
>>              }
>>          } else if (error == ENOBUFS) {
>>
diff mbox series

Patch

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 81de3b1..f2b1dfd 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -233,6 +233,18 @@  enum {
     VALID_FEATURES          = 1 << 7,
 };
 
+struct linux_lag_slave {
+   uint32_t block_id;
+   struct shash_node *node;
+};
+
+/* Protects 'lag_shash' and the mutable members of struct linux_lag_slave. */
+static struct ovs_mutex lag_mutex = OVS_MUTEX_INITIALIZER;
+
+/* All slaves whose LAG masters are network devices in OvS. */
+static struct shash lag_shash OVS_GUARDED_BY(lag_mutex)
+    = SHASH_INITIALIZER(&lag_shash);
+
 /* Traffic control. */
 
 /* An instance of a traffic control class.  Always associated with a particular
@@ -692,6 +704,57 @@  netdev_linux_kind_is_lag(const char *kind)
 }
 
 static void
+netdev_linux_update_lag(struct rtnetlink_change *change)
+    OVS_REQUIRES(lag_mutex)
+{
+    struct linux_lag_slave *lag;
+
+    if (change->slave && netdev_linux_kind_is_lag(change->slave)) {
+        lag = shash_find_data(&lag_shash, change->ifname);
+
+        if (!lag) {
+            struct netdev *master_netdev;
+            char master_name[IFNAMSIZ];
+            uint32_t block_id;
+            int error = 0;
+
+            if_indextoname(change->master_ifindex, master_name);
+            master_netdev = netdev_from_name(master_name);
+
+            if (is_netdev_linux_class(master_netdev->netdev_class)) {
+                block_id = netdev_get_block_id(master_netdev);
+                if (!block_id) {
+                   return;
+                }
+
+                lag = xmalloc(sizeof *lag);
+                lag->block_id = block_id;
+                lag->node = shash_add(&lag_shash, change->ifname, lag);
+
+                /* LAG master is linux netdev so add slave to same block. */
+                error = tc_add_del_ingress_qdisc(change->if_index, true,
+                                                 block_id);
+                if (error) {
+                    VLOG_WARN("failed to bind LAG slave to master's block");
+                    shash_delete(&lag_shash, lag->node);
+                    free(lag);
+                }
+            }
+        }
+    } else if (change->master_ifindex == 0) {
+        /* Check if this was a lag slave that has been freed. */
+        lag = shash_find_data(&lag_shash, change->ifname);
+
+        if (lag) {
+            tc_add_del_ingress_qdisc(change->if_index, false,
+                                     lag->block_id);
+            shash_delete(&lag_shash, lag->node);
+            free(lag);
+        }
+    }
+}
+
+static void
 netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED)
 {
     struct nl_sock *sock;
@@ -734,6 +797,12 @@  netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED)
                     netdev_linux_update(netdev, nsid, &change);
                     ovs_mutex_unlock(&netdev->mutex);
                 }
+                else if (!netdev_) {
+                    /* Netdev is not present in OvS but its master could be. */
+                    ovs_mutex_lock(&lag_mutex);
+                    netdev_linux_update_lag(&change);
+                    ovs_mutex_unlock(&lag_mutex);
+                }
                 netdev_close(netdev_);
             }
         } else if (error == ENOBUFS) {