diff mbox

[ovs-dev,1/2] ofproto/bond: Fix bond reconfiguration race condition.

Message ID 1487885501-105263-1-git-send-email-azhou@ovn.org
State Accepted
Headers show

Commit Message

Andy Zhou Feb. 23, 2017, 9:31 p.m. UTC
During the upcall thread bond output translation, bond_may_recirc()
is currently called outside the lock. In case the main thread executes
bond_reconfigure() at the same time, the upcall thread may find bond
state to be inconsistent when calling bond_update_post_recirc_rules().

This patch fixes the race condition by acquiring the write lock
before calling bond_may_recirc(). The APIs are refactored slightly.

The race condition can result in the following stack trace. Copied
from 'Reported-at':

    Thread 23 handler69:
    Invalid write of size 8
        update_recirc_rules (bond.c:385)
        bond_update_post_recirc_rules__ (bond.c:952)
        bond_update_post_recirc_rules (bond.c:960)
        output_normal (ofproto-dpif-xlate.c:2102)
        xlate_normal (ofproto-dpif-xlate.c:2858)
        xlate_output_action (ofproto-dpif-xlate.c:4407)
        do_xlate_actions (ofproto-dpif-xlate.c:5335)
        xlate_actions (ofproto-dpif-xlate.c:6198)
        upcall_xlate (ofproto-dpif-upcall.c:1129)
        process_upcall (ofproto-dpif-upcall.c:1271)
        recv_upcalls (ofproto-dpif-upcall.c:822)
        udpif_upcall_handler (ofproto-dpif-upcall.c:740)
    Address 0x18630490 is 1,904 bytes inside a block of size 12,288 free'd
        free (vg_replace_malloc.c:529)
        bond_entry_reset (bond.c:1635)
        bond_reconfigure (bond.c:457)
        bundle_set (ofproto-dpif.c:2896)
        ofproto_bundle_register (ofproto.c:1343)
        port_configure (bridge.c:1159)
        bridge_reconfigure (bridge.c:785)
        bridge_run (bridge.c:3099)
        main (ovs-vswitchd.c:111)
    Block was alloc'd at
        malloc (vg_replace_malloc.c:298)
        xmalloc (util.c:110)
        bond_entry_reset (bond.c:1629)
        bond_reconfigure (bond.c:457)
        bond_create (bond.c:245)
        bundle_set (ofproto-dpif.c:2900)
        ofproto_bundle_register (ofproto.c:1343)
        port_configure (bridge.c:1159)
        bridge_reconfigure (bridge.c:785)
        bridge_run (bridge.c:3099)
        main (ovs-vswitchd.c:111)

Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2017-February/328969.html
CC: Huanle Han <hanxueluo@gmail.com>
Signed-off-by: Andy Zhou <azhou@ovn.org>
---
 ofproto/bond.c               | 27 +++++++++++++++------------
 ofproto/bond.h               |  3 ++-
 ofproto/ofproto-dpif-xlate.c | 24 ++++++++++++++++--------
 3 files changed, 33 insertions(+), 21 deletions(-)

Comments

Jarno Rajahalme Feb. 23, 2017, 9:56 p.m. UTC | #1
LGTM,

Acked-by: Jarno Rajahalme <jarno@ovn.org>

> On Feb 23, 2017, at 1:31 PM, Andy Zhou <azhou@ovn.org> wrote:
> 
> During the upcall thread bond output translation, bond_may_recirc()
> is currently called outside the lock. In case the main thread executes
> bond_reconfigure() at the same time, the upcall thread may find bond
> state to be inconsistent when calling bond_update_post_recirc_rules().
> 
> This patch fixes the race condition by acquiring the write lock
> before calling bond_may_recirc(). The APIs are refactored slightly.
> 
> The race condition can result in the following stack trace. Copied
> from 'Reported-at':
> 
>    Thread 23 handler69:
>    Invalid write of size 8
>        update_recirc_rules (bond.c:385)
>        bond_update_post_recirc_rules__ (bond.c:952)
>        bond_update_post_recirc_rules (bond.c:960)
>        output_normal (ofproto-dpif-xlate.c:2102)
>        xlate_normal (ofproto-dpif-xlate.c:2858)
>        xlate_output_action (ofproto-dpif-xlate.c:4407)
>        do_xlate_actions (ofproto-dpif-xlate.c:5335)
>        xlate_actions (ofproto-dpif-xlate.c:6198)
>        upcall_xlate (ofproto-dpif-upcall.c:1129)
>        process_upcall (ofproto-dpif-upcall.c:1271)
>        recv_upcalls (ofproto-dpif-upcall.c:822)
>        udpif_upcall_handler (ofproto-dpif-upcall.c:740)
>    Address 0x18630490 is 1,904 bytes inside a block of size 12,288 free'd
>        free (vg_replace_malloc.c:529)
>        bond_entry_reset (bond.c:1635)
>        bond_reconfigure (bond.c:457)
>        bundle_set (ofproto-dpif.c:2896)
>        ofproto_bundle_register (ofproto.c:1343)
>        port_configure (bridge.c:1159)
>        bridge_reconfigure (bridge.c:785)
>        bridge_run (bridge.c:3099)
>        main (ovs-vswitchd.c:111)
>    Block was alloc'd at
>        malloc (vg_replace_malloc.c:298)
>        xmalloc (util.c:110)
>        bond_entry_reset (bond.c:1629)
>        bond_reconfigure (bond.c:457)
>        bond_create (bond.c:245)
>        bundle_set (ofproto-dpif.c:2900)
>        ofproto_bundle_register (ofproto.c:1343)
>        port_configure (bridge.c:1159)
>        bridge_reconfigure (bridge.c:785)
>        bridge_run (bridge.c:3099)
>        main (ovs-vswitchd.c:111)
> 
> Reported-by: Huanle Han <hanxueluo@gmail.com>
> Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2017-February/328969.html
> CC: Huanle Han <hanxueluo@gmail.com>
> Signed-off-by: Andy Zhou <azhou@ovn.org>
> ---
> ofproto/bond.c               | 27 +++++++++++++++------------
> ofproto/bond.h               |  3 ++-
> ofproto/ofproto-dpif-xlate.c | 24 ++++++++++++++++--------
> 3 files changed, 33 insertions(+), 21 deletions(-)
> 
> diff --git a/ofproto/bond.c b/ofproto/bond.c
> index 260023e4bb64..6e10c5143c0e 100644
> --- a/ofproto/bond.c
> +++ b/ofproto/bond.c
> @@ -916,17 +916,16 @@ bool
> bond_may_recirc(const struct bond *bond, uint32_t *recirc_id,
>                 uint32_t *hash_bias)
> {
> -    if (bond->balance == BM_TCP && bond->recirc_id) {
> -        if (recirc_id) {
> -            *recirc_id = bond->recirc_id;
> -        }
> -        if (hash_bias) {
> -            *hash_bias = bond->basis;
> -        }
> -        return true;
> -    } else {
> -        return false;
> +    bool may_recirc = bond->balance == BM_TCP && bond->recirc_id;
> +
> +    if (recirc_id) {
> +        *recirc_id = may_recirc ? bond->recirc_id : 0;
>     }
> +    if (hash_bias) {
> +        *hash_bias = may_recirc ? bond->basis : 0;
> +    }
> +
> +    return may_recirc;
> }
> 
> static void
> @@ -954,12 +953,16 @@ bond_update_post_recirc_rules__(struct bond* bond, const bool force)
> }
> 
> void
> -bond_update_post_recirc_rules(struct bond* bond, const bool force)
> +bond_update_post_recirc_rules(struct bond *bond, uint32_t *recirc_id,
> +                              uint32_t *hash_basis)
> {
>     ovs_rwlock_wrlock(&rwlock);
> -    bond_update_post_recirc_rules__(bond, force);
> +    if (bond_may_recirc(bond, recirc_id, hash_basis)) {
> +        bond_update_post_recirc_rules__(bond, false);
> +    }
>     ovs_rwlock_unlock(&rwlock);
> }
> +
> 
> /* Rebalancing. */
> 
> diff --git a/ofproto/bond.h b/ofproto/bond.h
> index 9a5ea9e21040..6e1221d2381b 100644
> --- a/ofproto/bond.h
> +++ b/ofproto/bond.h
> @@ -120,7 +120,8 @@ void bond_rebalance(struct bond *);
>  * Bond module pulls stats from those post recirculation rules. If rebalancing
>  * is needed, those rules are updated with new output actions.
> */
> -void bond_update_post_recirc_rules(struct bond *, const bool force);
> +void bond_update_post_recirc_rules(struct bond *, uint32_t *recirc_id,
> +                                   uint32_t *hash_basis);
> bool bond_may_recirc(const struct bond *, uint32_t *recirc_id,
>                      uint32_t *hash_bias);
> #endif /* bond.h */
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index 503a347fc98f..89fc3a44a0d1 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -1987,15 +1987,23 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
>         struct flow_wildcards *wc = ctx->wc;
>         struct ofport_dpif *ofport;
> 
> -        if (ctx->xbridge->support.odp.recirc) {
> -            use_recirc = bond_may_recirc(
> -                out_xbundle->bond, &xr.recirc_id, &xr.hash_basis);
> -
> -            if (use_recirc) {
> -                /* Only TCP mode uses recirculation. */
> +        if (ctx->xbridge->support.odp.recirc
> +            && bond_may_recirc(out_xbundle->bond, NULL, NULL)) {
> +            /* To avoid unnecessary locking, bond_may_recirc() is first
> +             * called outside of the 'rwlock'. After acquiring the lock,
> +             * bond_update_post_recirc_rules() will check again to make
> +             * sure bond configuration has not been changed.
> +             *
> +             * In case recirculation is not actually in use, 'xr.recirc_id'
> +             * will be set to '0', Since a valid 'recirc_id' can
> +             * not be zero.  */
> +            bond_update_post_recirc_rules(out_xbundle->bond,
> +                                          &xr.recirc_id,
> +                                          &xr.hash_basis);
> +            if (xr.recirc_id) {
> +                /* Use recirculation instead of output. */
> +                use_recirc = true;
>                 xr.hash_alg = OVS_HASH_ALG_L4;
> -                bond_update_post_recirc_rules(out_xbundle->bond, false);
> -
>                 /* Recirculation does not require unmasking hash fields. */
>                 wc = NULL;
>             }
> -- 
> 1.9.1
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Andy Zhou Feb. 24, 2017, 10:39 p.m. UTC | #2
On Thu, Feb 23, 2017 at 1:56 PM, Jarno Rajahalme <jarno@ovn.org> wrote:
> LGTM,
>
> Acked-by: Jarno Rajahalme <jarno@ovn.org>

Pushed both patches to master and backported to branch 2.7, 2.6 and
2.5.  Thanks!
diff mbox

Patch

diff --git a/ofproto/bond.c b/ofproto/bond.c
index 260023e4bb64..6e10c5143c0e 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -916,17 +916,16 @@  bool
 bond_may_recirc(const struct bond *bond, uint32_t *recirc_id,
                 uint32_t *hash_bias)
 {
-    if (bond->balance == BM_TCP && bond->recirc_id) {
-        if (recirc_id) {
-            *recirc_id = bond->recirc_id;
-        }
-        if (hash_bias) {
-            *hash_bias = bond->basis;
-        }
-        return true;
-    } else {
-        return false;
+    bool may_recirc = bond->balance == BM_TCP && bond->recirc_id;
+
+    if (recirc_id) {
+        *recirc_id = may_recirc ? bond->recirc_id : 0;
     }
+    if (hash_bias) {
+        *hash_bias = may_recirc ? bond->basis : 0;
+    }
+
+    return may_recirc;
 }
 
 static void
@@ -954,12 +953,16 @@  bond_update_post_recirc_rules__(struct bond* bond, const bool force)
 }
 
 void
-bond_update_post_recirc_rules(struct bond* bond, const bool force)
+bond_update_post_recirc_rules(struct bond *bond, uint32_t *recirc_id,
+                              uint32_t *hash_basis)
 {
     ovs_rwlock_wrlock(&rwlock);
-    bond_update_post_recirc_rules__(bond, force);
+    if (bond_may_recirc(bond, recirc_id, hash_basis)) {
+        bond_update_post_recirc_rules__(bond, false);
+    }
     ovs_rwlock_unlock(&rwlock);
 }
+
 
 /* Rebalancing. */
 
diff --git a/ofproto/bond.h b/ofproto/bond.h
index 9a5ea9e21040..6e1221d2381b 100644
--- a/ofproto/bond.h
+++ b/ofproto/bond.h
@@ -120,7 +120,8 @@  void bond_rebalance(struct bond *);
  * Bond module pulls stats from those post recirculation rules. If rebalancing
  * is needed, those rules are updated with new output actions.
 */
-void bond_update_post_recirc_rules(struct bond *, const bool force);
+void bond_update_post_recirc_rules(struct bond *, uint32_t *recirc_id,
+                                   uint32_t *hash_basis);
 bool bond_may_recirc(const struct bond *, uint32_t *recirc_id,
                      uint32_t *hash_bias);
 #endif /* bond.h */
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 503a347fc98f..89fc3a44a0d1 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -1987,15 +1987,23 @@  output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
         struct flow_wildcards *wc = ctx->wc;
         struct ofport_dpif *ofport;
 
-        if (ctx->xbridge->support.odp.recirc) {
-            use_recirc = bond_may_recirc(
-                out_xbundle->bond, &xr.recirc_id, &xr.hash_basis);
-
-            if (use_recirc) {
-                /* Only TCP mode uses recirculation. */
+        if (ctx->xbridge->support.odp.recirc
+            && bond_may_recirc(out_xbundle->bond, NULL, NULL)) {
+            /* To avoid unnecessary locking, bond_may_recirc() is first
+             * called outside of the 'rwlock'. After acquiring the lock,
+             * bond_update_post_recirc_rules() will check again to make
+             * sure bond configuration has not been changed.
+             *
+             * In case recirculation is not actually in use, 'xr.recirc_id'
+             * will be set to '0', Since a valid 'recirc_id' can
+             * not be zero.  */
+            bond_update_post_recirc_rules(out_xbundle->bond,
+                                          &xr.recirc_id,
+                                          &xr.hash_basis);
+            if (xr.recirc_id) {
+                /* Use recirculation instead of output. */
+                use_recirc = true;
                 xr.hash_alg = OVS_HASH_ALG_L4;
-                bond_update_post_recirc_rules(out_xbundle->bond, false);
-
                 /* Recirculation does not require unmasking hash fields. */
                 wc = NULL;
             }