Message ID | 1434043048-4444-7-git-send-email-dgilbert@redhat.com |
---|---|
State | New |
Headers | show |
On 06/11/2015 12:17 PM, Dr. David Alan Gilbert (git) wrote: > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > The 'offset' field in RDMACompress and 'current_addr' field > in RDMARegister are commented as being offsets within a particular > RAMBlock, however they appear to actually be offsets within the > ram_addr_t space. > > The code currently assumes that the offsets on the source/destination > match, this change removes the need for the assumption for these > structures by translating the addresses into the ram_addr_t space of > the destination host. > > Note: An alternative would be to change the fields to actually > take the data they're commented for; this would potentially be > simpler but would break stream compatibility for those cases > that currently work. > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > migration/rdma.c | 31 ++++++++++++++++++++++++------- > 1 file changed, 24 insertions(+), 7 deletions(-) > > diff --git a/migration/rdma.c b/migration/rdma.c > index 9532461..cb66721 100644 > --- a/migration/rdma.c > +++ b/migration/rdma.c > @@ -411,7 +411,7 @@ static void network_to_control(RDMAControlHeader *control) > */ > typedef struct QEMU_PACKED { > union QEMU_PACKED { > - uint64_t current_addr; /* offset into the ramblock of the chunk */ > + uint64_t current_addr; /* offset into the ram_addr_t space */ > uint64_t chunk; /* chunk to lookup if unregistering */ > } key; > uint32_t current_index; /* which ramblock the chunk belongs to */ > @@ -419,8 +419,19 @@ typedef struct QEMU_PACKED { > uint64_t chunks; /* how many sequential chunks to register */ > } RDMARegister; > > -static void register_to_network(RDMARegister *reg) > +static void register_to_network(RDMAContext *rdma, RDMARegister *reg) > { > + RDMALocalBlock *local_block; > + local_block = &rdma->local_ram_blocks.block[reg->current_index]; > + > + if (local_block->is_ram_block) { > + /* > + * current_addr as passed in is an address in the local ram_addr_t > + * space, we need to translate this for the destination > + */ > + reg->key.current_addr -= local_block->offset; > + reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset; > + } > reg->key.current_addr = htonll(reg->key.current_addr); > reg->current_index = htonl(reg->current_index); > reg->chunks = htonll(reg->chunks); > @@ -436,13 +447,19 @@ static void network_to_register(RDMARegister *reg) > typedef struct QEMU_PACKED { > uint32_t value; /* if zero, we will madvise() */ > uint32_t block_idx; /* which ram block index */ > - uint64_t offset; /* where in the remote ramblock this chunk */ > + uint64_t offset; /* Address in remote ram_addr_t space */ > uint64_t length; /* length of the chunk */ > } RDMACompress; > > -static void compress_to_network(RDMACompress *comp) > +static void compress_to_network(RDMAContext *rdma, RDMACompress *comp) > { > comp->value = htonl(comp->value); > + /* > + * comp->offset as passed in is an address in the local ram_addr_t > + * space, we need to translate this for the destination > + */ > + comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; > + comp->offset += rdma->dest_blocks[comp->block_idx].offset; > comp->block_idx = htonl(comp->block_idx); > comp->offset = htonll(comp->offset); > comp->length = htonll(comp->length); So, why add the destination block's offset on the source side just for it to be re-adjusted again when it gets to the destination side? Can you just stop at this: + reg->key.current_addr -= local_block->offset; Without this: + reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset; ... on the source, followed by this on the destionation: + comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; Without this: + comp->offset += rdma->dest_blocks[comp->block_idx].offset; Did I follow correctly? > @@ -1288,7 +1305,7 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma) > rdma->total_registrations--; > > reg.key.chunk = chunk; > - register_to_network(®); > + register_to_network(rdma, ®); > ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, > &resp, NULL, NULL); > if (ret < 0) { > @@ -1909,7 +1926,7 @@ retry: > trace_qemu_rdma_write_one_zero(chunk, sge.length, > current_index, current_addr); > > - compress_to_network(&comp); > + compress_to_network(rdma, &comp); > ret = qemu_rdma_exchange_send(rdma, &head, > (uint8_t *) &comp, NULL, NULL, NULL); > > @@ -1936,7 +1953,7 @@ retry: > trace_qemu_rdma_write_one_sendreg(chunk, sge.length, current_index, > current_addr); > > - register_to_network(®); > + register_to_network(rdma, ®); > ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, > &resp, ®_result_idx, NULL); > if (ret < 0) {
* Michael R. Hines (mrhines@linux.vnet.ibm.com) wrote: > On 06/11/2015 12:17 PM, Dr. David Alan Gilbert (git) wrote: > >From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > > >The 'offset' field in RDMACompress and 'current_addr' field > >in RDMARegister are commented as being offsets within a particular > >RAMBlock, however they appear to actually be offsets within the > >ram_addr_t space. > > > >The code currently assumes that the offsets on the source/destination > >match, this change removes the need for the assumption for these > >structures by translating the addresses into the ram_addr_t space of > >the destination host. > > > >Note: An alternative would be to change the fields to actually > >take the data they're commented for; this would potentially be > >simpler but would break stream compatibility for those cases > >that currently work. > > > >Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > >--- > > migration/rdma.c | 31 ++++++++++++++++++++++++------- > > 1 file changed, 24 insertions(+), 7 deletions(-) > > > >diff --git a/migration/rdma.c b/migration/rdma.c > >index 9532461..cb66721 100644 > >--- a/migration/rdma.c > >+++ b/migration/rdma.c > >@@ -411,7 +411,7 @@ static void network_to_control(RDMAControlHeader *control) > > */ > > typedef struct QEMU_PACKED { > > union QEMU_PACKED { > >- uint64_t current_addr; /* offset into the ramblock of the chunk */ > >+ uint64_t current_addr; /* offset into the ram_addr_t space */ > > uint64_t chunk; /* chunk to lookup if unregistering */ > > } key; > > uint32_t current_index; /* which ramblock the chunk belongs to */ > >@@ -419,8 +419,19 @@ typedef struct QEMU_PACKED { > > uint64_t chunks; /* how many sequential chunks to register */ > > } RDMARegister; > > > >-static void register_to_network(RDMARegister *reg) > >+static void register_to_network(RDMAContext *rdma, RDMARegister *reg) > > { > >+ RDMALocalBlock *local_block; > >+ local_block = &rdma->local_ram_blocks.block[reg->current_index]; > >+ > >+ if (local_block->is_ram_block) { > >+ /* > >+ * current_addr as passed in is an address in the local ram_addr_t > >+ * space, we need to translate this for the destination > >+ */ > >+ reg->key.current_addr -= local_block->offset; > >+ reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset; > >+ } > > reg->key.current_addr = htonll(reg->key.current_addr); > > reg->current_index = htonl(reg->current_index); > > reg->chunks = htonll(reg->chunks); > >@@ -436,13 +447,19 @@ static void network_to_register(RDMARegister *reg) > > typedef struct QEMU_PACKED { > > uint32_t value; /* if zero, we will madvise() */ > > uint32_t block_idx; /* which ram block index */ > >- uint64_t offset; /* where in the remote ramblock this chunk */ > >+ uint64_t offset; /* Address in remote ram_addr_t space */ > > uint64_t length; /* length of the chunk */ > > } RDMACompress; > > > >-static void compress_to_network(RDMACompress *comp) > >+static void compress_to_network(RDMAContext *rdma, RDMACompress *comp) > > { > > comp->value = htonl(comp->value); > >+ /* > >+ * comp->offset as passed in is an address in the local ram_addr_t > >+ * space, we need to translate this for the destination > >+ */ > >+ comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; > >+ comp->offset += rdma->dest_blocks[comp->block_idx].offset; > > comp->block_idx = htonl(comp->block_idx); > > comp->offset = htonll(comp->offset); > > comp->length = htonll(comp->length); > > So, why add the destination block's offset on the source side > just for it to be re-adjusted again when it gets to the destination side? > > Can you just stop at this: > > + reg->key.current_addr -= local_block->offset; > > Without this: > > + reg->key.current_addr += > rdma->dest_blocks[reg->current_index].offset; > > ... on the source, followed by this on the destionation: > > + comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; > > Without this: > > + comp->offset += rdma->dest_blocks[comp->block_idx].offset; > > Did I follow correctly? Aren't both of those conversions happening on the source? Anyway, I think what you're saying is that we change the value sent over the network to be an offset within the block instead of an offset in the whole ram_addr_t space (i.e. that's what happens if you don't add back on the dest_blocks[].offset). As I commented in the commit message, that would work but it would break compatibility with existing RDMA migrations since the offset field would now have a different meaning. Dave > > >@@ -1288,7 +1305,7 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma) > > rdma->total_registrations--; > > > > reg.key.chunk = chunk; > >- register_to_network(®); > >+ register_to_network(rdma, ®); > > ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, > > &resp, NULL, NULL); > > if (ret < 0) { > >@@ -1909,7 +1926,7 @@ retry: > > trace_qemu_rdma_write_one_zero(chunk, sge.length, > > current_index, current_addr); > > > >- compress_to_network(&comp); > >+ compress_to_network(rdma, &comp); > > ret = qemu_rdma_exchange_send(rdma, &head, > > (uint8_t *) &comp, NULL, NULL, NULL); > > > >@@ -1936,7 +1953,7 @@ retry: > > trace_qemu_rdma_write_one_sendreg(chunk, sge.length, current_index, > > current_addr); > > > >- register_to_network(®); > >+ register_to_network(rdma, ®); > > ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, > > &resp, ®_result_idx, NULL); > > if (ret < 0) { > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 06/11/2015 01:58 PM, Dr. David Alan Gilbert wrote: > * Michael R. Hines (mrhines@linux.vnet.ibm.com) wrote: >> On 06/11/2015 12:17 PM, Dr. David Alan Gilbert (git) wrote: >>> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> >>> >>> The 'offset' field in RDMACompress and 'current_addr' field >>> in RDMARegister are commented as being offsets within a particular >>> RAMBlock, however they appear to actually be offsets within the >>> ram_addr_t space. >>> >>> The code currently assumes that the offsets on the source/destination >>> match, this change removes the need for the assumption for these >>> structures by translating the addresses into the ram_addr_t space of >>> the destination host. >>> >>> Note: An alternative would be to change the fields to actually >>> take the data they're commented for; this would potentially be >>> simpler but would break stream compatibility for those cases >>> that currently work. >>> >>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> >>> --- >>> migration/rdma.c | 31 ++++++++++++++++++++++++------- >>> 1 file changed, 24 insertions(+), 7 deletions(-) >>> >>> diff --git a/migration/rdma.c b/migration/rdma.c >>> index 9532461..cb66721 100644 >>> --- a/migration/rdma.c >>> +++ b/migration/rdma.c >>> @@ -411,7 +411,7 @@ static void network_to_control(RDMAControlHeader *control) >>> */ >>> typedef struct QEMU_PACKED { >>> union QEMU_PACKED { >>> - uint64_t current_addr; /* offset into the ramblock of the chunk */ >>> + uint64_t current_addr; /* offset into the ram_addr_t space */ >>> uint64_t chunk; /* chunk to lookup if unregistering */ >>> } key; >>> uint32_t current_index; /* which ramblock the chunk belongs to */ >>> @@ -419,8 +419,19 @@ typedef struct QEMU_PACKED { >>> uint64_t chunks; /* how many sequential chunks to register */ >>> } RDMARegister; >>> >>> -static void register_to_network(RDMARegister *reg) >>> +static void register_to_network(RDMAContext *rdma, RDMARegister *reg) >>> { >>> + RDMALocalBlock *local_block; >>> + local_block = &rdma->local_ram_blocks.block[reg->current_index]; >>> + >>> + if (local_block->is_ram_block) { >>> + /* >>> + * current_addr as passed in is an address in the local ram_addr_t >>> + * space, we need to translate this for the destination >>> + */ >>> + reg->key.current_addr -= local_block->offset; >>> + reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset; >>> + } >>> reg->key.current_addr = htonll(reg->key.current_addr); >>> reg->current_index = htonl(reg->current_index); >>> reg->chunks = htonll(reg->chunks); >>> @@ -436,13 +447,19 @@ static void network_to_register(RDMARegister *reg) >>> typedef struct QEMU_PACKED { >>> uint32_t value; /* if zero, we will madvise() */ >>> uint32_t block_idx; /* which ram block index */ >>> - uint64_t offset; /* where in the remote ramblock this chunk */ >>> + uint64_t offset; /* Address in remote ram_addr_t space */ >>> uint64_t length; /* length of the chunk */ >>> } RDMACompress; >>> >>> -static void compress_to_network(RDMACompress *comp) >>> +static void compress_to_network(RDMAContext *rdma, RDMACompress *comp) >>> { >>> comp->value = htonl(comp->value); >>> + /* >>> + * comp->offset as passed in is an address in the local ram_addr_t >>> + * space, we need to translate this for the destination >>> + */ >>> + comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; >>> + comp->offset += rdma->dest_blocks[comp->block_idx].offset; >>> comp->block_idx = htonl(comp->block_idx); >>> comp->offset = htonll(comp->offset); >>> comp->length = htonll(comp->length); >> So, why add the destination block's offset on the source side >> just for it to be re-adjusted again when it gets to the destination side? >> >> Can you just stop at this: >> >> + reg->key.current_addr -= local_block->offset; >> >> Without this: >> >> + reg->key.current_addr += >> rdma->dest_blocks[reg->current_index].offset; >> >> ... on the source, followed by this on the destionation: >> >> + comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; >> >> Without this: >> >> + comp->offset += rdma->dest_blocks[comp->block_idx].offset; >> >> Did I follow correctly? > Aren't both of those conversions happening on the source? > Anyway, I think what you're saying is that we change the value sent over > the network to be an offset within the block instead of an offset in > the whole ram_addr_t space (i.e. that's what happens if you don't > add back on the dest_blocks[].offset). Yes, right. Can you skip adding/subtracting the local block offset on each side? - Michael
* Michael R. Hines (mrhines@linux.vnet.ibm.com) wrote: > On 06/11/2015 01:58 PM, Dr. David Alan Gilbert wrote: > >* Michael R. Hines (mrhines@linux.vnet.ibm.com) wrote: > >>On 06/11/2015 12:17 PM, Dr. David Alan Gilbert (git) wrote: > >>>From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > >>> > >>>The 'offset' field in RDMACompress and 'current_addr' field > >>>in RDMARegister are commented as being offsets within a particular > >>>RAMBlock, however they appear to actually be offsets within the > >>>ram_addr_t space. > >>> > >>>The code currently assumes that the offsets on the source/destination > >>>match, this change removes the need for the assumption for these > >>>structures by translating the addresses into the ram_addr_t space of > >>>the destination host. > >>> > >>>Note: An alternative would be to change the fields to actually > >>>take the data they're commented for; this would potentially be > >>>simpler but would break stream compatibility for those cases > >>>that currently work. > >>> > >>>Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > >>>--- > >>> migration/rdma.c | 31 ++++++++++++++++++++++++------- > >>> 1 file changed, 24 insertions(+), 7 deletions(-) > >>> > >>>diff --git a/migration/rdma.c b/migration/rdma.c > >>>index 9532461..cb66721 100644 > >>>--- a/migration/rdma.c > >>>+++ b/migration/rdma.c > >>>@@ -411,7 +411,7 @@ static void network_to_control(RDMAControlHeader *control) > >>> */ > >>> typedef struct QEMU_PACKED { > >>> union QEMU_PACKED { > >>>- uint64_t current_addr; /* offset into the ramblock of the chunk */ > >>>+ uint64_t current_addr; /* offset into the ram_addr_t space */ > >>> uint64_t chunk; /* chunk to lookup if unregistering */ > >>> } key; > >>> uint32_t current_index; /* which ramblock the chunk belongs to */ > >>>@@ -419,8 +419,19 @@ typedef struct QEMU_PACKED { > >>> uint64_t chunks; /* how many sequential chunks to register */ > >>> } RDMARegister; > >>> > >>>-static void register_to_network(RDMARegister *reg) > >>>+static void register_to_network(RDMAContext *rdma, RDMARegister *reg) > >>> { > >>>+ RDMALocalBlock *local_block; > >>>+ local_block = &rdma->local_ram_blocks.block[reg->current_index]; > >>>+ > >>>+ if (local_block->is_ram_block) { > >>>+ /* > >>>+ * current_addr as passed in is an address in the local ram_addr_t > >>>+ * space, we need to translate this for the destination > >>>+ */ > >>>+ reg->key.current_addr -= local_block->offset; > >>>+ reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset; > >>>+ } > >>> reg->key.current_addr = htonll(reg->key.current_addr); > >>> reg->current_index = htonl(reg->current_index); > >>> reg->chunks = htonll(reg->chunks); > >>>@@ -436,13 +447,19 @@ static void network_to_register(RDMARegister *reg) > >>> typedef struct QEMU_PACKED { > >>> uint32_t value; /* if zero, we will madvise() */ > >>> uint32_t block_idx; /* which ram block index */ > >>>- uint64_t offset; /* where in the remote ramblock this chunk */ > >>>+ uint64_t offset; /* Address in remote ram_addr_t space */ > >>> uint64_t length; /* length of the chunk */ > >>> } RDMACompress; > >>> > >>>-static void compress_to_network(RDMACompress *comp) > >>>+static void compress_to_network(RDMAContext *rdma, RDMACompress *comp) > >>> { > >>> comp->value = htonl(comp->value); > >>>+ /* > >>>+ * comp->offset as passed in is an address in the local ram_addr_t > >>>+ * space, we need to translate this for the destination > >>>+ */ > >>>+ comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; > >>>+ comp->offset += rdma->dest_blocks[comp->block_idx].offset; > >>> comp->block_idx = htonl(comp->block_idx); > >>> comp->offset = htonll(comp->offset); > >>> comp->length = htonll(comp->length); > >>So, why add the destination block's offset on the source side > >>just for it to be re-adjusted again when it gets to the destination side? > >> > >>Can you just stop at this: > >> > >>+ reg->key.current_addr -= local_block->offset; > >> > >>Without this: > >> > >>+ reg->key.current_addr += > >>rdma->dest_blocks[reg->current_index].offset; > >> > >>... on the source, followed by this on the destionation: > >> > >>+ comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; > >> > >>Without this: > >> > >>+ comp->offset += rdma->dest_blocks[comp->block_idx].offset; > >> > >>Did I follow correctly? > >Aren't both of those conversions happening on the source? > >Anyway, I think what you're saying is that we change the value sent over > >the network to be an offset within the block instead of an offset in > >the whole ram_addr_t space (i.e. that's what happens if you don't > >add back on the dest_blocks[].offset). > > Yes, right. Can you skip adding/subtracting the local block offset on each > side? I don't understand how I can do that without changing the wire format so that it would be subtly incompatible, and I'd like to get 2.1ish migrating to 2.3ish. If I didn't add the local_block->offset on the source, the value on the wire would now be the offset within the RAMBlock rather than the offset in ram_addr_t. Except for compatibility I'd agree it would be simpler. Dave > > - Michael > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 06/12/2015 01:50 PM, Dr. David Alan Gilbert wrote: > * Michael R. Hines (mrhines@linux.vnet.ibm.com) wrote: >> On 06/11/2015 01:58 PM, Dr. David Alan Gilbert wrote: >>> * Michael R. Hines (mrhines@linux.vnet.ibm.com) wrote: >>>> On 06/11/2015 12:17 PM, Dr. David Alan Gilbert (git) wrote: >>>>> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> >>>>> >>>>> The 'offset' field in RDMACompress and 'current_addr' field >>>>> in RDMARegister are commented as being offsets within a particular >>>>> RAMBlock, however they appear to actually be offsets within the >>>>> ram_addr_t space. >>>>> >>>>> The code currently assumes that the offsets on the source/destination >>>>> match, this change removes the need for the assumption for these >>>>> structures by translating the addresses into the ram_addr_t space of >>>>> the destination host. >>>>> >>>>> Note: An alternative would be to change the fields to actually >>>>> take the data they're commented for; this would potentially be >>>>> simpler but would break stream compatibility for those cases >>>>> that currently work. >>>>> >>>>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> >>>>> --- >>>>> migration/rdma.c | 31 ++++++++++++++++++++++++------- >>>>> 1 file changed, 24 insertions(+), 7 deletions(-) >>>>> >>>>> diff --git a/migration/rdma.c b/migration/rdma.c >>>>> index 9532461..cb66721 100644 >>>>> --- a/migration/rdma.c >>>>> +++ b/migration/rdma.c >>>>> @@ -411,7 +411,7 @@ static void network_to_control(RDMAControlHeader *control) >>>>> */ >>>>> typedef struct QEMU_PACKED { >>>>> union QEMU_PACKED { >>>>> - uint64_t current_addr; /* offset into the ramblock of the chunk */ >>>>> + uint64_t current_addr; /* offset into the ram_addr_t space */ >>>>> uint64_t chunk; /* chunk to lookup if unregistering */ >>>>> } key; >>>>> uint32_t current_index; /* which ramblock the chunk belongs to */ >>>>> @@ -419,8 +419,19 @@ typedef struct QEMU_PACKED { >>>>> uint64_t chunks; /* how many sequential chunks to register */ >>>>> } RDMARegister; >>>>> >>>>> -static void register_to_network(RDMARegister *reg) >>>>> +static void register_to_network(RDMAContext *rdma, RDMARegister *reg) >>>>> { >>>>> + RDMALocalBlock *local_block; >>>>> + local_block = &rdma->local_ram_blocks.block[reg->current_index]; >>>>> + >>>>> + if (local_block->is_ram_block) { >>>>> + /* >>>>> + * current_addr as passed in is an address in the local ram_addr_t >>>>> + * space, we need to translate this for the destination >>>>> + */ >>>>> + reg->key.current_addr -= local_block->offset; >>>>> + reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset; >>>>> + } >>>>> reg->key.current_addr = htonll(reg->key.current_addr); >>>>> reg->current_index = htonl(reg->current_index); >>>>> reg->chunks = htonll(reg->chunks); >>>>> @@ -436,13 +447,19 @@ static void network_to_register(RDMARegister *reg) >>>>> typedef struct QEMU_PACKED { >>>>> uint32_t value; /* if zero, we will madvise() */ >>>>> uint32_t block_idx; /* which ram block index */ >>>>> - uint64_t offset; /* where in the remote ramblock this chunk */ >>>>> + uint64_t offset; /* Address in remote ram_addr_t space */ >>>>> uint64_t length; /* length of the chunk */ >>>>> } RDMACompress; >>>>> >>>>> -static void compress_to_network(RDMACompress *comp) >>>>> +static void compress_to_network(RDMAContext *rdma, RDMACompress *comp) >>>>> { >>>>> comp->value = htonl(comp->value); >>>>> + /* >>>>> + * comp->offset as passed in is an address in the local ram_addr_t >>>>> + * space, we need to translate this for the destination >>>>> + */ >>>>> + comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; >>>>> + comp->offset += rdma->dest_blocks[comp->block_idx].offset; >>>>> comp->block_idx = htonl(comp->block_idx); >>>>> comp->offset = htonll(comp->offset); >>>>> comp->length = htonll(comp->length); >>>> So, why add the destination block's offset on the source side >>>> just for it to be re-adjusted again when it gets to the destination side? >>>> >>>> Can you just stop at this: >>>> >>>> + reg->key.current_addr -= local_block->offset; >>>> >>>> Without this: >>>> >>>> + reg->key.current_addr += >>>> rdma->dest_blocks[reg->current_index].offset; >>>> >>>> ... on the source, followed by this on the destionation: >>>> >>>> + comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; >>>> >>>> Without this: >>>> >>>> + comp->offset += rdma->dest_blocks[comp->block_idx].offset; >>>> >>>> Did I follow correctly? >>> Aren't both of those conversions happening on the source? >>> Anyway, I think what you're saying is that we change the value sent over >>> the network to be an offset within the block instead of an offset in >>> the whole ram_addr_t space (i.e. that's what happens if you don't >>> add back on the dest_blocks[].offset). >> Yes, right. Can you skip adding/subtracting the local block offset on each >> side? > I don't understand how I can do that without changing the wire format so > that it would be subtly incompatible, and I'd like to get 2.1ish migrating to 2.3ish. > If I didn't add the local_block->offset on the source, the value on the wire > would now be the offset within the RAMBlock rather than the offset in ram_addr_t. > > Except for compatibility I'd agree it would be simpler. > > Dave > > >> - Michael >> > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > Acknowledged.
"Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote: > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > The 'offset' field in RDMACompress and 'current_addr' field > in RDMARegister are commented as being offsets within a particular > RAMBlock, however they appear to actually be offsets within the > ram_addr_t space. > > The code currently assumes that the offsets on the source/destination > match, this change removes the need for the assumption for these > structures by translating the addresses into the ram_addr_t space of > the destination host. > > Note: An alternative would be to change the fields to actually > take the data they're commented for; this would potentially be > simpler but would break stream compatibility for those cases > that currently work. > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Reviewed-by: Juan Quintela <quintela@redhat.com> backwards compatibility, you will live with your errors forever.... > + /* > + * current_addr as passed in is an address in the local ram_addr_t > + * space, we need to translate this for the destination > + */ > + reg->key.current_addr -= local_block->offset; > + reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset; I would add a function that is: rdma_adjust_offest() or something, but it needs three pointer parameters, not sure that it is any easier :-(
diff --git a/migration/rdma.c b/migration/rdma.c index 9532461..cb66721 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -411,7 +411,7 @@ static void network_to_control(RDMAControlHeader *control) */ typedef struct QEMU_PACKED { union QEMU_PACKED { - uint64_t current_addr; /* offset into the ramblock of the chunk */ + uint64_t current_addr; /* offset into the ram_addr_t space */ uint64_t chunk; /* chunk to lookup if unregistering */ } key; uint32_t current_index; /* which ramblock the chunk belongs to */ @@ -419,8 +419,19 @@ typedef struct QEMU_PACKED { uint64_t chunks; /* how many sequential chunks to register */ } RDMARegister; -static void register_to_network(RDMARegister *reg) +static void register_to_network(RDMAContext *rdma, RDMARegister *reg) { + RDMALocalBlock *local_block; + local_block = &rdma->local_ram_blocks.block[reg->current_index]; + + if (local_block->is_ram_block) { + /* + * current_addr as passed in is an address in the local ram_addr_t + * space, we need to translate this for the destination + */ + reg->key.current_addr -= local_block->offset; + reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset; + } reg->key.current_addr = htonll(reg->key.current_addr); reg->current_index = htonl(reg->current_index); reg->chunks = htonll(reg->chunks); @@ -436,13 +447,19 @@ static void network_to_register(RDMARegister *reg) typedef struct QEMU_PACKED { uint32_t value; /* if zero, we will madvise() */ uint32_t block_idx; /* which ram block index */ - uint64_t offset; /* where in the remote ramblock this chunk */ + uint64_t offset; /* Address in remote ram_addr_t space */ uint64_t length; /* length of the chunk */ } RDMACompress; -static void compress_to_network(RDMACompress *comp) +static void compress_to_network(RDMAContext *rdma, RDMACompress *comp) { comp->value = htonl(comp->value); + /* + * comp->offset as passed in is an address in the local ram_addr_t + * space, we need to translate this for the destination + */ + comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; + comp->offset += rdma->dest_blocks[comp->block_idx].offset; comp->block_idx = htonl(comp->block_idx); comp->offset = htonll(comp->offset); comp->length = htonll(comp->length); @@ -1288,7 +1305,7 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma) rdma->total_registrations--; reg.key.chunk = chunk; - register_to_network(®); + register_to_network(rdma, ®); ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, &resp, NULL, NULL); if (ret < 0) { @@ -1909,7 +1926,7 @@ retry: trace_qemu_rdma_write_one_zero(chunk, sge.length, current_index, current_addr); - compress_to_network(&comp); + compress_to_network(rdma, &comp); ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) &comp, NULL, NULL, NULL); @@ -1936,7 +1953,7 @@ retry: trace_qemu_rdma_write_one_sendreg(chunk, sge.length, current_index, current_addr); - register_to_network(®); + register_to_network(rdma, ®); ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, &resp, ®_result_idx, NULL); if (ret < 0) {