diff mbox series

[ovs-dev,v2] ofproto-dpif-rid: Fix duplicate entries.

Message ID 20240430093609.1739186-1-wushaohua@chinatelecom.cn
State Changes Requested
Headers show
Series [ovs-dev,v2] ofproto-dpif-rid: Fix duplicate entries. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

wushaohua@chinatelecom.cn April 30, 2024, 9:36 a.m. UTC
From: Shaohua Wu <wushaohua@chinatelecom.cn>

In scenarios with multiple PMDs, there may be
simultaneous requests for recirc_id from multiple
PMD threads.In recirc_alloc_id_ctx, we first check
if there is a duplicate entry in the metadata_map
for the same frozen_state field. If successful,
we directly retrieve the recirc_id. If unsuccessful,
we create a new recirc_node and insert it into
id_map and metadata_map. There is no locking mechanism
to prevent the possibility of two threads with the same
state simultaneously inserting, meaning their IDs are
different, but their frozen_states are the same.

trace log:
static struct recirc_id_node *
recirc_alloc_id__(const struct frozen_state *state, uint32_t hash)
{
    ovs_assert(state->action_set_len <= state->ofpacts_len);

    struct recirc_id_node *node = xzalloc(sizeof *node);

    node->hash = hash;
    ovs_refcount_init(&node->refcount);
    node->is_hotupgrade = false;
    frozen_state_clone(CONST_CAST(struct frozen_state *, &node->state), state);
    struct recirc_id_node *hash_recirc_node = recirc_find_equal(&node->state, state);
    if (hash_recirc_node) {
        VLOG_INFO("wsh:hash equal:hash_recirc_node %p id:%u, hash_recirc_node hash:%u,node %p hash:%u\n",
                   hash_recirc_node, hash_recirc_node->id, hash_recirc_node->hash, node, node->hash);
    }
    ovs_mutex_lock(&mutex);
    ...
}

Log recording:
2024-04-27T12:28:47.973Z|00006|ofproto_dpif_rid(pmd-c08/id:13)|INFO|wsh:hash equal:
hash_recirc_node 0x7fb29c0276a0 id:27,hash_recirc_node hash:3224122528,node 0x7fb2900194d0 hash:3224122528
2024-04-27T12:28:47.973Z|00009|ofproto_dpif_rid(pmd-c02/id:15)|INFO|wsh:hash equal:
hash_recirc_node 0x7fb29c0276a0 id:27,hash_recirc_node hash:3224122528,node 0x7fb288025270 hash:3224122528
2024-04-27T12:28:47.973Z|00006|ofproto_dpif_rid(pmd-c03/id:14)|INFO|wsh:hash equal:
hash_recirc_node 0x7fb29c0276a0 id:27,hash_recirc_node hash:3224122528,node 0x7fb29401d4e0 hash:3224122528
2024-04-27T12:28:47.973Z|00004|ofproto_dpif_rid(pmd-c09/id:12)|INFO|node->id:28,hash:4019648042,table_id:75
2024-04-27T12:28:47.973Z|00007|ofproto_dpif_rid(pmd-c08/id:13)|INFO|wsh:hash equal:
hash_recirc_node 0x7fb29c028d40 id:28,hash_recirc_node hash:4019648042,node 0x7fb29001ac30 hash:4019648042
2024-04-27T12:28:48.065Z|00005|ofproto_dpif_rid(pmd-c09/id:12)|INFO|node->id:29,hash:3800776147,table_id:30
2024-04-27T12:28:48.101Z|00007|ofproto_dpif_rid(pmd-c03/id:14)|INFO|node->id:30,hash:1580334976,table_id:75
Signed-off-by: Shaohua Wu <wushaohua@chinatelecom.cn>

---
v1->v2:modify log recording , add trace code.
---
 ofproto/ofproto-dpif-rid.c | 7 +++++++
 1 file changed, 7 insertions(+)

Comments

'Simon Horman' May 1, 2024, 12:36 p.m. UTC | #1
On Tue, Apr 30, 2024 at 05:36:09PM +0800, wushaohua@chinatelecom.cn wrote:
> From: Shaohua Wu <wushaohua@chinatelecom.cn>
> 
> In scenarios with multiple PMDs, there may be
> simultaneous requests for recirc_id from multiple
> PMD threads.In recirc_alloc_id_ctx, we first check
> if there is a duplicate entry in the metadata_map
> for the same frozen_state field. If successful,
> we directly retrieve the recirc_id. If unsuccessful,
> we create a new recirc_node and insert it into
> id_map and metadata_map. There is no locking mechanism
> to prevent the possibility of two threads with the same
> state simultaneously inserting, meaning their IDs are
> different, but their frozen_states are the same.

Hi Shaohua Wu,

It seems that recirc_alloc_id_ctx() already contains
logic to check for an existing entry found
by recirc_ref_equal(). Is the problem that this is
not inside the critical section protected by mutex?.

If so:
1. The check in recirc_alloc_id_ctx should be removed.
2. What are the implications for recirc_alloc_id__(),
   the other caller of recirc_alloc_id__()?
   Does it also have the problem you describe?
   Although I don't think there are any correctness issues,
   could the logic added to recirc_alloc_id__() have some
   impact on recirc_alloc_id(), such as performance?

> 
> trace log:
> static struct recirc_id_node *
> recirc_alloc_id__(const struct frozen_state *state, uint32_t hash)
> {
>     ovs_assert(state->action_set_len <= state->ofpacts_len);
> 
>     struct recirc_id_node *node = xzalloc(sizeof *node);
> 
>     node->hash = hash;
>     ovs_refcount_init(&node->refcount);
>     node->is_hotupgrade = false;
>     frozen_state_clone(CONST_CAST(struct frozen_state *, &node->state), state);
>     struct recirc_id_node *hash_recirc_node = recirc_find_equal(&node->state, state);
>     if (hash_recirc_node) {
>         VLOG_INFO("wsh:hash equal:hash_recirc_node %p id:%u, hash_recirc_node hash:%u,node %p hash:%u\n",
>                    hash_recirc_node, hash_recirc_node->id, hash_recirc_node->hash, node, node->hash);
>     }
>     ovs_mutex_lock(&mutex);
>     ...
> }
> 
> Log recording:
> 2024-04-27T12:28:47.973Z|00006|ofproto_dpif_rid(pmd-c08/id:13)|INFO|wsh:hash equal:
> hash_recirc_node 0x7fb29c0276a0 id:27,hash_recirc_node hash:3224122528,node 0x7fb2900194d0 hash:3224122528
> 2024-04-27T12:28:47.973Z|00009|ofproto_dpif_rid(pmd-c02/id:15)|INFO|wsh:hash equal:
> hash_recirc_node 0x7fb29c0276a0 id:27,hash_recirc_node hash:3224122528,node 0x7fb288025270 hash:3224122528
> 2024-04-27T12:28:47.973Z|00006|ofproto_dpif_rid(pmd-c03/id:14)|INFO|wsh:hash equal:
> hash_recirc_node 0x7fb29c0276a0 id:27,hash_recirc_node hash:3224122528,node 0x7fb29401d4e0 hash:3224122528
> 2024-04-27T12:28:47.973Z|00004|ofproto_dpif_rid(pmd-c09/id:12)|INFO|node->id:28,hash:4019648042,table_id:75
> 2024-04-27T12:28:47.973Z|00007|ofproto_dpif_rid(pmd-c08/id:13)|INFO|wsh:hash equal:
> hash_recirc_node 0x7fb29c028d40 id:28,hash_recirc_node hash:4019648042,node 0x7fb29001ac30 hash:4019648042
> 2024-04-27T12:28:48.065Z|00005|ofproto_dpif_rid(pmd-c09/id:12)|INFO|node->id:29,hash:3800776147,table_id:30
> 2024-04-27T12:28:48.101Z|00007|ofproto_dpif_rid(pmd-c03/id:14)|INFO|node->id:30,hash:1580334976,table_id:75

nit: blank line before Signed-off-by line please.

> Signed-off-by: Shaohua Wu <wushaohua@chinatelecom.cn>
> 
> ---
> v1->v2:modify log recording , add trace code.

If you post a v3, please do so in a new thread.
Thanks!

...
diff mbox series

Patch

diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
index f01468025..651db6c53 100644
--- a/ofproto/ofproto-dpif-rid.c
+++ b/ofproto/ofproto-dpif-rid.c
@@ -234,6 +234,7 @@  recirc_alloc_id__(const struct frozen_state *state, uint32_t hash)
 {
     ovs_assert(state->action_set_len <= state->ofpacts_len);
 
+    struct recirc_id_node *hash_recirc_node = NULL;
     struct recirc_id_node *node = xzalloc(sizeof *node);
 
     node->hash = hash;
@@ -241,6 +242,12 @@  recirc_alloc_id__(const struct frozen_state *state, uint32_t hash)
     frozen_state_clone(CONST_CAST(struct frozen_state *, &node->state), state);
 
     ovs_mutex_lock(&mutex);
+    hash_recirc_node = recirc_ref_equal(&node->state, node->hash);
+    if (hash_recirc_node) {
+        ovs_mutex_unlock(&mutex);
+        recirc_id_node_free(node);
+        return hash_recirc_node;
+    }
     for (;;) {
         /* Claim the next ID.  The ID space should be sparse enough for the
            allocation to succeed at the first try.  We do skip the first