diff mbox series

[39/52] migration/rdma: Convert qemu_rdma_write_one() to Error

Message ID 20230918144206.560120-40-armbru@redhat.com
State New
Headers show
Series migration/rdma: Error handling fixes | expand

Commit Message

Markus Armbruster Sept. 18, 2023, 2:41 p.m. UTC
Functions that use an Error **errp parameter to return errors should
not also report them to the user, because reporting is the caller's
job.  When the caller does, the error is reported twice.  When it
doesn't (because it recovered from the error), there is no error to
report, i.e. the report is bogus.

qemu_rdma_write_flush() violates this principle: it calls
error_report() via qemu_rdma_write_one().  I elected not to
investigate how callers handle the error, i.e. precise impact is not
known.

Clean this up by converting qemu_rdma_write_one() to Error.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 migration/rdma.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

Comments

Li Zhijian Sept. 26, 2023, 5:24 a.m. UTC | #1
On 18/09/2023 22:41, Markus Armbruster wrote:
> Functions that use an Error **errp parameter to return errors should
> not also report them to the user, because reporting is the caller's
> job.  When the caller does, the error is reported twice.  When it
> doesn't (because it recovered from the error), there is no error to
> report, i.e. the report is bogus.
> 
> qemu_rdma_write_flush() violates this principle: it calls
> error_report() via qemu_rdma_write_one().  I elected not to
> investigate how callers handle the error, i.e. precise impact is not
> known.
> 
> Clean this up by converting qemu_rdma_write_one() to Error.
> 
> Signed-off-by: Markus Armbruster<armbru@redhat.com>


Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
Li Zhijian Sept. 26, 2023, 5:50 a.m. UTC | #2
On 18/09/2023 22:41, Markus Armbruster wrote:
> Functions that use an Error **errp parameter to return errors should
> not also report them to the user, because reporting is the caller's
> job.  When the caller does, the error is reported twice.  When it
> doesn't (because it recovered from the error), there is no error to
> report, i.e. the report is bogus.
> 
> qemu_rdma_write_flush() violates this principle: it calls
> error_report() via qemu_rdma_write_one().  I elected not to
> investigate how callers handle the error, i.e. precise impact is not
> known.
> 
> Clean this up by converting qemu_rdma_write_one() to Error.
> 
> Signed-off-by: Markus Armbruster<armbru@redhat.com>
> ---
>   migration/rdma.c | 25 +++++++++++--------------
>   1 file changed, 11 insertions(+), 14 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index c3c33fe242..9b8cbadfcd 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -2019,9 +2019,8 @@ static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head,
>    */
>   static int qemu_rdma_write_one(QEMUFile *f, RDMAContext *rdma,
>                                  int current_index, uint64_t current_addr,
> -                               uint64_t length)
> +                               uint64_t length, Error **errp)
>   {
> -    Error *err = NULL;
>       struct ibv_sge sge;
>       struct ibv_send_wr send_wr = { 0 };
>       struct ibv_send_wr *bad_wr;

[...]

>           }
> @@ -2219,7 +2216,7 @@ retry:
>           goto retry;
>   
>       } else if (ret > 0) {
> -        perror("rdma migration: post rdma write failed");
> +        error_setg(errp, "rdma migration: post rdma write failed");

It reminds that do you miss to use error_setg_errno() instead.





>           return -1;
>       }
Li Zhijian Sept. 26, 2023, 5:55 a.m. UTC | #3
On 26/09/2023 13:50, Li Zhijian wrote:
> 
> 
> On 18/09/2023 22:41, Markus Armbruster wrote:
>> Functions that use an Error **errp parameter to return errors should
>> not also report them to the user, because reporting is the caller's
>> job.  When the caller does, the error is reported twice.  When it
>> doesn't (because it recovered from the error), there is no error to
>> report, i.e. the report is bogus.
>>
>> qemu_rdma_write_flush() violates this principle: it calls
>> error_report() via qemu_rdma_write_one().  I elected not to
>> investigate how callers handle the error, i.e. precise impact is not
>> known.
>>
>> Clean this up by converting qemu_rdma_write_one() to Error.
>>
>> Signed-off-by: Markus Armbruster<armbru@redhat.com>
>> ---
>>   migration/rdma.c | 25 +++++++++++--------------
>>   1 file changed, 11 insertions(+), 14 deletions(-)
>>
>> diff --git a/migration/rdma.c b/migration/rdma.c
>> index c3c33fe242..9b8cbadfcd 100644
>> --- a/migration/rdma.c
>> +++ b/migration/rdma.c
>> @@ -2019,9 +2019,8 @@ static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head,
>>    */
>>   static int qemu_rdma_write_one(QEMUFile *f, RDMAContext *rdma,
>>                                  int current_index, uint64_t current_addr,
>> -                               uint64_t length)
>> +                               uint64_t length, Error **errp)
>>   {
>> -    Error *err = NULL;
>>       struct ibv_sge sge;
>>       struct ibv_send_wr send_wr = { 0 };
>>       struct ibv_send_wr *bad_wr;
> 
> [...]
> 
>>           }
>> @@ -2219,7 +2216,7 @@ retry:
>>           goto retry;
>>       } else if (ret > 0) {
>> -        perror("rdma migration: post rdma write failed");
>> +        error_setg(errp, "rdma migration: post rdma write failed");
> 
> It reminds that do you miss to use error_setg_errno() instead.
> 

Answer it myself:
ibv_post_send(3) says:

RETURN VALUE
        ibv_post_send() returns 0 on success, or the value of errno on failure (which indicates the failure reason).


the global error is not defined here.



> 
> 
> 
> 
>>           return -1;
>>       }
Markus Armbruster Sept. 26, 2023, 6:40 a.m. UTC | #4
"Zhijian Li (Fujitsu)" <lizhijian@fujitsu.com> writes:

> On 18/09/2023 22:41, Markus Armbruster wrote:
>> Functions that use an Error **errp parameter to return errors should
>> not also report them to the user, because reporting is the caller's
>> job.  When the caller does, the error is reported twice.  When it
>> doesn't (because it recovered from the error), there is no error to
>> report, i.e. the report is bogus.
>> 
>> qemu_rdma_write_flush() violates this principle: it calls
>> error_report() via qemu_rdma_write_one().  I elected not to
>> investigate how callers handle the error, i.e. precise impact is not
>> known.
>> 
>> Clean this up by converting qemu_rdma_write_one() to Error.
>> 
>> Signed-off-by: Markus Armbruster<armbru@redhat.com>
>> ---
>>   migration/rdma.c | 25 +++++++++++--------------
>>   1 file changed, 11 insertions(+), 14 deletions(-)
>> 
>> diff --git a/migration/rdma.c b/migration/rdma.c
>> index c3c33fe242..9b8cbadfcd 100644
>> --- a/migration/rdma.c
>> +++ b/migration/rdma.c
>> @@ -2019,9 +2019,8 @@ static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head,
>>    */
>>   static int qemu_rdma_write_one(QEMUFile *f, RDMAContext *rdma,
>>                                  int current_index, uint64_t current_addr,
>> -                               uint64_t length)
>> +                               uint64_t length, Error **errp)
>>   {
>> -    Error *err = NULL;
>>       struct ibv_sge sge;
>>       struct ibv_send_wr send_wr = { 0 };
>>       struct ibv_send_wr *bad_wr;
>
> [...]
>
>>           }
>> @@ -2219,7 +2216,7 @@ retry:
>>           goto retry;
>>   
>>       } else if (ret > 0) {
>> -        perror("rdma migration: post rdma write failed");
>> +        error_setg(errp, "rdma migration: post rdma write failed");
>
> It reminds that do you miss to use error_setg_errno() instead.

Indeed!

I'll adjust the commit message as well.

>>           return -1;
>>       }
Markus Armbruster Sept. 26, 2023, 9:26 a.m. UTC | #5
"Zhijian Li (Fujitsu)" <lizhijian@fujitsu.com> writes:

> On 26/09/2023 13:50, Li Zhijian wrote:
>> 
>> 
>> On 18/09/2023 22:41, Markus Armbruster wrote:
>>> Functions that use an Error **errp parameter to return errors should
>>> not also report them to the user, because reporting is the caller's
>>> job.  When the caller does, the error is reported twice.  When it
>>> doesn't (because it recovered from the error), there is no error to
>>> report, i.e. the report is bogus.
>>>
>>> qemu_rdma_write_flush() violates this principle: it calls
>>> error_report() via qemu_rdma_write_one().  I elected not to
>>> investigate how callers handle the error, i.e. precise impact is not
>>> known.
>>>
>>> Clean this up by converting qemu_rdma_write_one() to Error.
>>>
>>> Signed-off-by: Markus Armbruster<armbru@redhat.com>
>>> ---
>>>   migration/rdma.c | 25 +++++++++++--------------
>>>   1 file changed, 11 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/migration/rdma.c b/migration/rdma.c
>>> index c3c33fe242..9b8cbadfcd 100644
>>> --- a/migration/rdma.c
>>> +++ b/migration/rdma.c
>>> @@ -2019,9 +2019,8 @@ static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head,
>>>    */
>>>   static int qemu_rdma_write_one(QEMUFile *f, RDMAContext *rdma,
>>>                                  int current_index, uint64_t current_addr,
>>> -                               uint64_t length)
>>> +                               uint64_t length, Error **errp)
>>>   {
>>> -    Error *err = NULL;
>>>       struct ibv_sge sge;
>>>       struct ibv_send_wr send_wr = { 0 };
>>>       struct ibv_send_wr *bad_wr;
>> 
>> [...]
>> 
>>>           }
>>> @@ -2219,7 +2216,7 @@ retry:
>>>           goto retry;
>>>       } else if (ret > 0) {
>>> -        perror("rdma migration: post rdma write failed");
>>> +        error_setg(errp, "rdma migration: post rdma write failed");
>> 
>> It reminds that do you miss to use error_setg_errno() instead.
>> 
>
> Answer it myself:
> ibv_post_send(3) says:
>
> RETURN VALUE
>         ibv_post_send() returns 0 on success, or the value of errno on failure (which indicates the failure reason).

I read this as "assign error code to errno and return it."  But...

> the global error is not defined here.

... your assertion made me check the source code, and it looks like it
does *not* assign to errno, at least not reliably.  Which means perror()
prints garbage.

I'll delete the perror() in a separate patch.

>>>           return -1;
>>>       }
Markus Armbruster Sept. 27, 2023, 11:46 a.m. UTC | #6
migration/rdma.c uses errno directly or via perror() after the following
functions:

* poll()

  POSIX specifies errno is set on error.  Good.

* rdma_get_cm_event(), rdma_connect(), rdma_get_cm_event()

  Manual page promises "if an error occurs, errno will be set".  Good.

* ibv_open_device()

  Manual page does not mention errno.  Using it seems ill-advised.

  qemu_rdma_broken_ipv6_kernel() recovers from EPERM by trying the next
  device.  Wrong if ibv_open_device() doesn't actually set errno.

  What is to be done here?

* ibv_reg_mr()

  Manual page does not mention errno.  Using it seems ill-advised.

  qemu_rdma_reg_whole_ram_blocks() and qemu_rdma_register_and_get_keys()
  recover from errno = ENOTSUP by retrying with modified @access
  argument.  Wrong if ibv_reg_mr() doesn't actually set errno.

  What is to be done here?

* ibv_get_cq_event()

  Manual page does not mention errno.  Using it seems ill-advised.

  qemu_rdma_block_for_wrid() calls perror().  Removed in PATCH 48.  Good
  enough.

* ibv_post_send()

  Manual page has the function return "the value of errno on failure".
  Sounds like it sets errno to the value it returns.  However, the
  rdma-core repository defines it as

    static inline int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr,
                                    struct ibv_send_wr **bad_wr)
    {
            return qp->context->ops.post_send(qp, wr, bad_wr);
    }

  and at least one of the methods fails without setting errno:

    static int mana_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
                              struct ibv_send_wr **bad)
    {
            /* This version of driver supports RAW QP only.
             * Posting WR is done directly in the application.
             */
            return EOPNOTSUPP;
    }

  qemu_rdma_write_one() calls perror().  PATCH 39 (this one) replaces it
  by error_setg(), not error_setg_errno().  Seems prudent, but should be
  called out in the commit message.

* ibv_advise_mr()

  Manual page has the function return "the value of errno on failure".
  Sounds like it sets errno to the value it returns, but my findings for
  ibv_post_send() make me doubt it.

  qemu_rdma_advise_prefetch_mr() traces strerror(errno).  Could be
  misleading.  Drop that part?

* ibv_dereg_mr()

  Manual page has the function return "the value of errno on failure".
  Sounds like it sets errno to the value it returns, but my findings for
  ibv_post_send() make me doubt it.

  qemu_rdma_unregister_waiting() calls perror().  Removed in PATCH 51.
  Good enough.

* qemu_get_cm_event_timeout()

  Can fail without setting errno.

  qemu_rdma_connect() calls perror().  Removed in PATCH 45.  Good
  enough.

Thoughts?


[...]

[*] https://github.com/linux-rdma/rdma-core.git
    commit 55fa316b4b18f258d8ac1ceb4aa5a7a35b094dcf
Markus Armbruster Sept. 28, 2023, 6:49 a.m. UTC | #7
Let me try to map solutions.

Markus Armbruster <armbru@redhat.com> writes:

> migration/rdma.c uses errno directly or via perror() after the following
> functions:
>
> * poll()
>
>   POSIX specifies errno is set on error.  Good.

Nothing wrong, nothing to do.

> * rdma_get_cm_event(), rdma_connect(), rdma_get_cm_event()
>
>   Manual page promises "if an error occurs, errno will be set".  Good.

Nothing wrong, nothing to do.

> * ibv_open_device()
>
>   Manual page does not mention errno.  Using it seems ill-advised.
>
>   qemu_rdma_broken_ipv6_kernel() recovers from EPERM by trying the next
>   device.  Wrong if ibv_open_device() doesn't actually set errno.
>
>   What is to be done here?

1. Investigate whether ibv_open_device() sets errno.  I can't spare time
   for that.

2. Add a comment pointing out the problem, in the hope somebody
   investigates later.

3. Do nothing.

> * ibv_reg_mr()
>
>   Manual page does not mention errno.  Using it seems ill-advised.
>
>   qemu_rdma_reg_whole_ram_blocks() and qemu_rdma_register_and_get_keys()
>   recover from errno = ENOTSUP by retrying with modified @access
>   argument.  Wrong if ibv_reg_mr() doesn't actually set errno.
>
>   What is to be done here?

Likewise.

> * ibv_get_cq_event()
>
>   Manual page does not mention errno.  Using it seems ill-advised.
>
>   qemu_rdma_block_for_wrid() calls perror().  Removed in PATCH 48.  Good
>   enough.

1. Add a comment pointing out the problem, remove it in PATCH 48.

2. Nothing wrong after the series, nothing to do.

> * ibv_post_send()
>
>   Manual page has the function return "the value of errno on failure".
>   Sounds like it sets errno to the value it returns.  However, the
>   rdma-core repository defines it as
>
>     static inline int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr,
>                                     struct ibv_send_wr **bad_wr)
>     {
>             return qp->context->ops.post_send(qp, wr, bad_wr);
>     }
>
>   and at least one of the methods fails without setting errno:
>
>     static int mana_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
>                               struct ibv_send_wr **bad)
>     {
>             /* This version of driver supports RAW QP only.
>              * Posting WR is done directly in the application.
>              */
>             return EOPNOTSUPP;
>     }
>
>   qemu_rdma_write_one() calls perror().  PATCH 39 (this one) replaces it
>   by error_setg(), not error_setg_errno().  Seems prudent, but should be
>   called out in the commit message.

1. Add a comment pointing out the problem, remove it in PATCH 39.

2. Pass @ret, not @errno to error_setg_errno().

3. Nothing wrong after the series, nothing to do.

Since 2. is easy, let's do it.

> * ibv_advise_mr()
>
>   Manual page has the function return "the value of errno on failure".
>   Sounds like it sets errno to the value it returns, but my findings for
>   ibv_post_send() make me doubt it.
>
>   qemu_rdma_advise_prefetch_mr() traces strerror(errno).  Could be
>   misleading.  Drop that part?

1. Change sterror(errno) to strerror(ret)

2. Drop strerror(errno)

3. Do nothing.

Since 1. is easy, let's do it.

> * ibv_dereg_mr()
>
>   Manual page has the function return "the value of errno on failure".
>   Sounds like it sets errno to the value it returns, but my findings for
>   ibv_post_send() make me doubt it.
>
>   qemu_rdma_unregister_waiting() calls perror().  Removed in PATCH 51.
>   Good enough.

1. Add a comment pointing out the problem, remove it in PATCH 51.

2. Nothing wrong after the series, nothing to do.

> * qemu_get_cm_event_timeout()
>
>   Can fail without setting errno.
>
>   qemu_rdma_connect() calls perror().  Removed in PATCH 45.  Good
>   enough.
>
> Thoughts?

Considering all of the above...  I'd like to stick a patch documenting
problematic errno use early in the series, and fix all the easy ones
later in the series, leaving just the two difficult ones in
qemu_rdma_broken_ipv6_kernel() and qemu_rdma_reg_whole_ram_blocks().

Makes sense?

> [...]
>
> [*] https://github.com/linux-rdma/rdma-core.git
>     commit 55fa316b4b18f258d8ac1ceb4aa5a7a35b094dcf
Li Zhijian Oct. 7, 2023, 3:40 a.m. UTC | #8
+rdma-core


Is global variable *errno* reliable when the documentation only states
"returns 0 on success, or the value of errno on failure (which indicates the failure reason)."

Someone read it as "assign error code to errno and return it.", I used to think the same way.
but ibv_post_send() doesn't always follow this rule. see ibv_post_send() -> mana_post_send()

Actually, QEMU are using errno after calling libibverbs APIs, so we hope the man page can be
more clear. like posix does:

RETURN VALUE
        Upon successful completion fopen(), fdopen() and freopen() return a FILE pointer.  Otherwise, NULL is returned and errno is set to indicate the error

Thanks
Zhijian


On 27/09/2023 19:46, Markus Armbruster wrote:
> migration/rdma.c uses errno directly or via perror() after the following
> functions:
> 
> * poll()
> 
>    POSIX specifies errno is set on error.  Good.
> 
> * rdma_get_cm_event(), rdma_connect(), rdma_get_cm_event()
> 
>    Manual page promises "if an error occurs, errno will be set".  Good.
> 
> * ibv_open_device()
> 
>    Manual page does not mention errno.  Using it seems ill-advised.
> 
>    qemu_rdma_broken_ipv6_kernel() recovers from EPERM by trying the next
>    device.  Wrong if ibv_open_device() doesn't actually set errno.
> 
>    What is to be done here?
> 
> * ibv_reg_mr()
> 
>    Manual page does not mention errno.  Using it seems ill-advised.
> 
>    qemu_rdma_reg_whole_ram_blocks() and qemu_rdma_register_and_get_keys()
>    recover from errno = ENOTSUP by retrying with modified @access
>    argument.  Wrong if ibv_reg_mr() doesn't actually set errno.
> 
>    What is to be done here?
> 
> * ibv_get_cq_event()
> 
>    Manual page does not mention errno.  Using it seems ill-advised.
> 
>    qemu_rdma_block_for_wrid() calls perror().  Removed in PATCH 48.  Good
>    enough.
> 
> * ibv_post_send()
> 
>    Manual page has the function return "the value of errno on failure".
>    Sounds like it sets errno to the value it returns.  However, the
>    rdma-core repository defines it as
> 
>      static inline int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr,
>                                      struct ibv_send_wr **bad_wr)
>      {
>              return qp->context->ops.post_send(qp, wr, bad_wr);
>      }
> 
>    and at least one of the methods fails without setting errno:
> 
>      static int mana_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
>                                struct ibv_send_wr **bad)
>      {
>              /* This version of driver supports RAW QP only.
>               * Posting WR is done directly in the application.
>               */
>              return EOPNOTSUPP;
>      }
> 
>    qemu_rdma_write_one() calls perror().  PATCH 39 (this one) replaces it
>    by error_setg(), not error_setg_errno().  Seems prudent, but should be
>    called out in the commit message.
> 
> * ibv_advise_mr()
> 
>    Manual page has the function return "the value of errno on failure".
>    Sounds like it sets errno to the value it returns, but my findings for
>    ibv_post_send() make me doubt it.
> 
>    qemu_rdma_advise_prefetch_mr() traces strerror(errno).  Could be
>    misleading.  Drop that part?
> 
> * ibv_dereg_mr()
> 
>    Manual page has the function return "the value of errno on failure".
>    Sounds like it sets errno to the value it returns, but my findings for
>    ibv_post_send() make me doubt it.
> 
>    qemu_rdma_unregister_waiting() calls perror().  Removed in PATCH 51.
>    Good enough.
> 
> * qemu_get_cm_event_timeout()
> 
>    Can fail without setting errno.
> 
>    qemu_rdma_connect() calls perror().  Removed in PATCH 45.  Good
>    enough.
> 
> Thoughts?
> 
> 
> [...]
> 
> [*] https://github.com/linux-rdma/rdma-core.git
>      commit 55fa316b4b18f258d8ac1ceb4aa5a7a35b094dcf
>
Jason Gunthorpe Oct. 16, 2023, 12:11 p.m. UTC | #9
On Sat, Oct 07, 2023 at 03:40:50AM +0000, Zhijian Li (Fujitsu) wrote:
> +rdma-core
> 
> 
> Is global variable *errno* reliable when the documentation only states
> "returns 0 on success, or the value of errno on failure (which indicates the failure reason)."

I think the intention of this language was that an errno constant is
returned, the caller should not assume it is stored in errno.

errno is difficult, many things overwrite it, you can loose its value
during error handling.

Jason
Li Zhijian Oct. 17, 2023, 5:22 a.m. UTC | #10
On 16/10/2023 20:11, Jason Gunthorpe wrote:
> On Sat, Oct 07, 2023 at 03:40:50AM +0000, Zhijian Li (Fujitsu) wrote:
>> +rdma-core
>>
>>
>> Is global variable *errno* reliable when the documentation only states
>> "returns 0 on success, or the value of errno on failure (which indicates the failure reason)."
> 
> I think the intention of this language was that an errno constant is
> returned, the caller should not assume it is stored in errno.
> 
Understood.
If that's the case, I think some pieces of code were misunderstood in QEMU before.


Thanks
Zhijian

> errno is difficult, many things overwrite it, you can loose its value
> during error handling.
> 
> Jason
diff mbox series

Patch

diff --git a/migration/rdma.c b/migration/rdma.c
index c3c33fe242..9b8cbadfcd 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2019,9 +2019,8 @@  static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head,
  */
 static int qemu_rdma_write_one(QEMUFile *f, RDMAContext *rdma,
                                int current_index, uint64_t current_addr,
-                               uint64_t length)
+                               uint64_t length, Error **errp)
 {
-    Error *err = NULL;
     struct ibv_sge sge;
     struct ibv_send_wr send_wr = { 0 };
     struct ibv_send_wr *bad_wr;
@@ -2075,7 +2074,7 @@  retry:
         ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL);
 
         if (ret < 0) {
-            error_report("Failed to Wait for previous write to complete "
+            error_setg(errp, "Failed to Wait for previous write to complete "
                     "block %d chunk %" PRIu64
                     " current %" PRIu64 " len %" PRIu64 " %d",
                     current_index, chunk, sge.addr, length, rdma->nb_sent);
@@ -2107,10 +2106,9 @@  retry:
 
                 compress_to_network(rdma, &comp);
                 ret = qemu_rdma_exchange_send(rdma, &head,
-                                (uint8_t *) &comp, NULL, NULL, NULL, &err);
+                                (uint8_t *) &comp, NULL, NULL, NULL, errp);
 
                 if (ret < 0) {
-                    error_report_err(err);
                     return -1;
                 }
 
@@ -2136,9 +2134,8 @@  retry:
 
             register_to_network(rdma, &reg);
             ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) &reg,
-                                    &resp, &reg_result_idx, NULL, &err);
+                                    &resp, &reg_result_idx, NULL, errp);
             if (ret < 0) {
-                error_report_err(err);
                 return -1;
             }
 
@@ -2146,7 +2143,7 @@  retry:
             if (qemu_rdma_register_and_get_keys(rdma, block, sge.addr,
                                                 &sge.lkey, NULL, chunk,
                                                 chunk_start, chunk_end)) {
-                error_report("cannot get lkey");
+                error_setg(errp, "cannot get lkey");
                 return -1;
             }
 
@@ -2165,7 +2162,7 @@  retry:
             if (qemu_rdma_register_and_get_keys(rdma, block, sge.addr,
                                                 &sge.lkey, NULL, chunk,
                                                 chunk_start, chunk_end)) {
-                error_report("cannot get lkey!");
+                error_setg(errp, "cannot get lkey!");
                 return -1;
             }
         }
@@ -2177,7 +2174,7 @@  retry:
         if (qemu_rdma_register_and_get_keys(rdma, block, sge.addr,
                                                      &sge.lkey, NULL, chunk,
                                                      chunk_start, chunk_end)) {
-            error_report("cannot get lkey!");
+            error_setg(errp, "cannot get lkey!");
             return -1;
         }
     }
@@ -2211,7 +2208,7 @@  retry:
         trace_qemu_rdma_write_one_queue_full();
         ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL);
         if (ret < 0) {
-            error_report("rdma migration: failed to make "
+            error_setg(errp, "rdma migration: failed to make "
                          "room in full send queue!");
             return -1;
         }
@@ -2219,7 +2216,7 @@  retry:
         goto retry;
 
     } else if (ret > 0) {
-        perror("rdma migration: post rdma write failed");
+        error_setg(errp, "rdma migration: post rdma write failed");
         return -1;
     }
 
@@ -2248,10 +2245,10 @@  static int qemu_rdma_write_flush(QEMUFile *f, RDMAContext *rdma,
     }
 
     ret = qemu_rdma_write_one(f, rdma,
-            rdma->current_index, rdma->current_addr, rdma->current_length);
+            rdma->current_index, rdma->current_addr, rdma->current_length,
+            errp);
 
     if (ret < 0) {
-        error_setg(errp, "FIXME temporary error message");
         return -1;
     }