diff mbox

[08/10] Rework ram block hash

Message ID 1429545445-28216-9-git-send-email-dgilbert@redhat.com
State New
Headers show

Commit Message

Dr. David Alan Gilbert April 20, 2015, 3:57 p.m. UTC
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

RDMA uses a hash from block offset->RAM Block; this isn't needed
on the destination, and now that the destination sorts the ramblock
list, is harder to maintain.

Split the hash so that it's only generated on the source.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/rdma.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

Comments

mrhines@linux.vnet.ibm.com May 19, 2015, 6:49 p.m. UTC | #1
On 04/20/2015 10:57 AM, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>
> RDMA uses a hash from block offset->RAM Block; this isn't needed
> on the destination, and now that the destination sorts the ramblock
> list, is harder to maintain.

Destination sorts the ramblock list? Is the patchset out-of-order?
I didn't see that yet..... Why is it sorted?

I would like to keep the ramblock list directly addressable by hash
on both sides, because, as I mentioned earlier, we want as much
flexibility in registering RAMBlock memory as possible by being
able to add or delete arbitrary blocks int the list at anytime during
a migration.

I will try to get the patchset that allows anyone to register memory
for transfer out as soon as I can.

>
> Split the hash so that it's only generated on the source.
>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>   migration/rdma.c | 32 ++++++++++++++++++++------------
>   1 file changed, 20 insertions(+), 12 deletions(-)
>
> diff --git a/migration/rdma.c b/migration/rdma.c
> index fe3b76e..185817e 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -533,23 +533,22 @@ static int rdma_add_block(RDMAContext *rdma, const char *block_name,
>                            ram_addr_t block_offset, uint64_t length)
>   {
>       RDMALocalBlocks *local = &rdma->local_ram_blocks;
> -    RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap,
> -        (void *)(uintptr_t)block_offset);
> +    RDMALocalBlock *block;
>       RDMALocalBlock *old = local->block;
>
> -    assert(block == NULL);
> -
>       local->block = g_malloc0(sizeof(RDMALocalBlock) * (local->nb_blocks + 1));
>
>       if (local->nb_blocks) {
>           int x;
>
> -        for (x = 0; x < local->nb_blocks; x++) {
> -            g_hash_table_remove(rdma->blockmap,
> -                                (void *)(uintptr_t)old[x].offset);
> -            g_hash_table_insert(rdma->blockmap,
> -                                (void *)(uintptr_t)old[x].offset,
> -                                &local->block[x]);
> +        if (rdma->blockmap) {
> +            for (x = 0; x < local->nb_blocks; x++) {
> +                g_hash_table_remove(rdma->blockmap,
> +                                    (void *)(uintptr_t)old[x].offset);
> +                g_hash_table_insert(rdma->blockmap,
> +                                    (void *)(uintptr_t)old[x].offset,
> +                                    &local->block[x]);
> +            }
>           }
>           memcpy(local->block, old, sizeof(RDMALocalBlock) * local->nb_blocks);
>           g_free(old);
> @@ -571,7 +570,9 @@ static int rdma_add_block(RDMAContext *rdma, const char *block_name,
>
>       block->is_ram_block = local->init ? false : true;
>
> -    g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
> +    if (rdma->blockmap) {
> +        g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
> +    }
>
>       trace_rdma_add_block(block_name, local->nb_blocks,
>                            (uintptr_t) block->local_host_addr,
> @@ -607,7 +608,6 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma)
>       RDMALocalBlocks *local = &rdma->local_ram_blocks;
>
>       assert(rdma->blockmap == NULL);
> -    rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
>       memset(local, 0, sizeof *local);
>       qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma);
>       trace_qemu_rdma_init_ram_blocks(local->nb_blocks);
> @@ -2248,6 +2248,14 @@ static int qemu_rdma_source_init(RDMAContext *rdma, Error **errp, bool pin_all)
>           goto err_rdma_source_init;
>       }
>
> +    /* Build the hash that maps from offset to RAMBlock */
> +    rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
> +    for (idx = 0; idx < rdma->local_ram_blocks.nb_blocks; idx++) {
> +        g_hash_table_insert(rdma->blockmap,
> +                (void *)(uintptr_t)rdma->local_ram_blocks.block[idx].offset,
> +                &rdma->local_ram_blocks.block[idx]);
> +    }
> +
>       for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
>           ret = qemu_rdma_reg_control(rdma, idx);
>           if (ret) {
Dr. David Alan Gilbert May 19, 2015, 6:55 p.m. UTC | #2
* Michael R. Hines (mrhines@linux.vnet.ibm.com) wrote:
> On 04/20/2015 10:57 AM, Dr. David Alan Gilbert (git) wrote:
> >From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> >
> >RDMA uses a hash from block offset->RAM Block; this isn't needed
> >on the destination, and now that the destination sorts the ramblock
> >list, is harder to maintain.
> 
> Destination sorts the ramblock list? Is the patchset out-of-order?
> I didn't see that yet..... Why is it sorted?

It's the next patch in the list - please see that one.
Since I use an index into the list it made it the easiest thing to index
on.

> I would like to keep the ramblock list directly addressable by hash
> on both sides, because, as I mentioned earlier, we want as much
> flexibility in registering RAMBlock memory as possible by being
> able to add or delete arbitrary blocks int the list at anytime during
> a migration.
> 
> I will try to get the patchset that allows anyone to register memory
> for transfer out as soon as I can.

Hmm OK, I think I can rework that to regenerate the hash; it's a little
difficult without knowing how you're intending to use it.

Dave

> 
> >
> >Split the hash so that it's only generated on the source.
> >
> >Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> >---
> >  migration/rdma.c | 32 ++++++++++++++++++++------------
> >  1 file changed, 20 insertions(+), 12 deletions(-)
> >
> >diff --git a/migration/rdma.c b/migration/rdma.c
> >index fe3b76e..185817e 100644
> >--- a/migration/rdma.c
> >+++ b/migration/rdma.c
> >@@ -533,23 +533,22 @@ static int rdma_add_block(RDMAContext *rdma, const char *block_name,
> >                           ram_addr_t block_offset, uint64_t length)
> >  {
> >      RDMALocalBlocks *local = &rdma->local_ram_blocks;
> >-    RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap,
> >-        (void *)(uintptr_t)block_offset);
> >+    RDMALocalBlock *block;
> >      RDMALocalBlock *old = local->block;
> >
> >-    assert(block == NULL);
> >-
> >      local->block = g_malloc0(sizeof(RDMALocalBlock) * (local->nb_blocks + 1));
> >
> >      if (local->nb_blocks) {
> >          int x;
> >
> >-        for (x = 0; x < local->nb_blocks; x++) {
> >-            g_hash_table_remove(rdma->blockmap,
> >-                                (void *)(uintptr_t)old[x].offset);
> >-            g_hash_table_insert(rdma->blockmap,
> >-                                (void *)(uintptr_t)old[x].offset,
> >-                                &local->block[x]);
> >+        if (rdma->blockmap) {
> >+            for (x = 0; x < local->nb_blocks; x++) {
> >+                g_hash_table_remove(rdma->blockmap,
> >+                                    (void *)(uintptr_t)old[x].offset);
> >+                g_hash_table_insert(rdma->blockmap,
> >+                                    (void *)(uintptr_t)old[x].offset,
> >+                                    &local->block[x]);
> >+            }
> >          }
> >          memcpy(local->block, old, sizeof(RDMALocalBlock) * local->nb_blocks);
> >          g_free(old);
> >@@ -571,7 +570,9 @@ static int rdma_add_block(RDMAContext *rdma, const char *block_name,
> >
> >      block->is_ram_block = local->init ? false : true;
> >
> >-    g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
> >+    if (rdma->blockmap) {
> >+        g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
> >+    }
> >
> >      trace_rdma_add_block(block_name, local->nb_blocks,
> >                           (uintptr_t) block->local_host_addr,
> >@@ -607,7 +608,6 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma)
> >      RDMALocalBlocks *local = &rdma->local_ram_blocks;
> >
> >      assert(rdma->blockmap == NULL);
> >-    rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
> >      memset(local, 0, sizeof *local);
> >      qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma);
> >      trace_qemu_rdma_init_ram_blocks(local->nb_blocks);
> >@@ -2248,6 +2248,14 @@ static int qemu_rdma_source_init(RDMAContext *rdma, Error **errp, bool pin_all)
> >          goto err_rdma_source_init;
> >      }
> >
> >+    /* Build the hash that maps from offset to RAMBlock */
> >+    rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
> >+    for (idx = 0; idx < rdma->local_ram_blocks.nb_blocks; idx++) {
> >+        g_hash_table_insert(rdma->blockmap,
> >+                (void *)(uintptr_t)rdma->local_ram_blocks.block[idx].offset,
> >+                &rdma->local_ram_blocks.block[idx]);
> >+    }
> >+
> >      for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
> >          ret = qemu_rdma_reg_control(rdma, idx);
> >          if (ret) {
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
mrhines@linux.vnet.ibm.com May 19, 2015, 7:02 p.m. UTC | #3
On 05/19/2015 01:55 PM, Dr. David Alan Gilbert wrote:
>> I would like to keep the ramblock list directly addressable by hash
>> on both sides, because, as I mentioned earlier, we want as much
>> flexibility in registering RAMBlock memory as possible by being
>> able to add or delete arbitrary blocks int the list at anytime during
>> a migration.
>>
>> I will try to get the patchset that allows anyone to register memory
>> for transfer out as soon as I can.
> Hmm OK, I think I can rework that to regenerate the hash; it's a little
> difficult without knowing how you're intending to use it.
>
> Dave

We can use the RAMBlock name as a key to the hash, right?

I see a "future" where storage replication also uses RDMA,
(not that I'm volunteering to write it), but I don't want to
lose the ability for arbitrary QEMU callers to be able to
register/unregister ramblocks dynamically.

- Michae
Dr. David Alan Gilbert May 19, 2015, 7:07 p.m. UTC | #4
* Michael R. Hines (mrhines@linux.vnet.ibm.com) wrote:
> On 05/19/2015 01:55 PM, Dr. David Alan Gilbert wrote:
> >>I would like to keep the ramblock list directly addressable by hash
> >>on both sides, because, as I mentioned earlier, we want as much
> >>flexibility in registering RAMBlock memory as possible by being
> >>able to add or delete arbitrary blocks int the list at anytime during
> >>a migration.
> >>
> >>I will try to get the patchset that allows anyone to register memory
> >>for transfer out as soon as I can.
> >Hmm OK, I think I can rework that to regenerate the hash; it's a little
> >difficult without knowing how you're intending to use it.
> >
> >Dave
> 
> We can use the RAMBlock name as a key to the hash, right?

Hmm, OK, I need to look at that; I guess some of those pointers are
constant once received.

> I see a "future" where storage replication also uses RDMA,
> (not that I'm volunteering to write it), but I don't want to
> lose the ability for arbitrary QEMU callers to be able to
> register/unregister ramblocks dynamically.

Yes, I'm going to try and wire RDMA up for COLO like you did
for the microcheckpoint work, so I can see the need to keep
it more general.

Let me go away and think about it; I think I can probably
keep the destination side hash, but it was just easier
to do it like this since it was unused.

If you do have code lieing around that uses it, please post
it and cc me and I can try and keep it so it looks like it might
work.

Dave

> - Michae
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox

Patch

diff --git a/migration/rdma.c b/migration/rdma.c
index fe3b76e..185817e 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -533,23 +533,22 @@  static int rdma_add_block(RDMAContext *rdma, const char *block_name,
                          ram_addr_t block_offset, uint64_t length)
 {
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
-    RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap,
-        (void *)(uintptr_t)block_offset);
+    RDMALocalBlock *block;
     RDMALocalBlock *old = local->block;
 
-    assert(block == NULL);
-
     local->block = g_malloc0(sizeof(RDMALocalBlock) * (local->nb_blocks + 1));
 
     if (local->nb_blocks) {
         int x;
 
-        for (x = 0; x < local->nb_blocks; x++) {
-            g_hash_table_remove(rdma->blockmap,
-                                (void *)(uintptr_t)old[x].offset);
-            g_hash_table_insert(rdma->blockmap,
-                                (void *)(uintptr_t)old[x].offset,
-                                &local->block[x]);
+        if (rdma->blockmap) {
+            for (x = 0; x < local->nb_blocks; x++) {
+                g_hash_table_remove(rdma->blockmap,
+                                    (void *)(uintptr_t)old[x].offset);
+                g_hash_table_insert(rdma->blockmap,
+                                    (void *)(uintptr_t)old[x].offset,
+                                    &local->block[x]);
+            }
         }
         memcpy(local->block, old, sizeof(RDMALocalBlock) * local->nb_blocks);
         g_free(old);
@@ -571,7 +570,9 @@  static int rdma_add_block(RDMAContext *rdma, const char *block_name,
 
     block->is_ram_block = local->init ? false : true;
 
-    g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
+    if (rdma->blockmap) {
+        g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
+    }
 
     trace_rdma_add_block(block_name, local->nb_blocks,
                          (uintptr_t) block->local_host_addr,
@@ -607,7 +608,6 @@  static int qemu_rdma_init_ram_blocks(RDMAContext *rdma)
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
 
     assert(rdma->blockmap == NULL);
-    rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
     memset(local, 0, sizeof *local);
     qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma);
     trace_qemu_rdma_init_ram_blocks(local->nb_blocks);
@@ -2248,6 +2248,14 @@  static int qemu_rdma_source_init(RDMAContext *rdma, Error **errp, bool pin_all)
         goto err_rdma_source_init;
     }
 
+    /* Build the hash that maps from offset to RAMBlock */
+    rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
+    for (idx = 0; idx < rdma->local_ram_blocks.nb_blocks; idx++) {
+        g_hash_table_insert(rdma->blockmap,
+                (void *)(uintptr_t)rdma->local_ram_blocks.block[idx].offset,
+                &rdma->local_ram_blocks.block[idx]);
+    }
+
     for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
         ret = qemu_rdma_reg_control(rdma, idx);
         if (ret) {