diff mbox series

[4/4] spapr: Improve spapr_reallocate_hpt() error reporting

Message ID 160371605460.305923.5890143959901241157.stgit@bahia.lan
State New
Headers show
Series spapr: Error handling fixes and cleanups (round 5) | expand

Commit Message

Greg Kurz Oct. 26, 2020, 12:40 p.m. UTC
spapr_reallocate_hpt() has three users, two of which pass &error_fatal
and the third one, htab_load(), passes &local_err, uses it to detect
failures and simply propagates -EINVAL up to vmstate_load(), which will
cause QEMU to exit. It is thus confusing that spapr_reallocate_hpt()
doesn't return right away when an error is detected in some cases. Also,
the comment suggesting that the caller is welcome to try to carry on
seems like a remnant in this respect.

This can be improved:
- change spapr_reallocate_hpt() to always report a negative errno on
  failure, either as reported by KVM or -ENOSPC if the HPT is smaller
  than what was asked,
- use that to detect failures in htab_load() which is preferred over
  checking &local_err,
- propagate this negative errno to vmstate_load() because it is more
  accurate than propagating -EINVAL for all possible errors.

Signed-off-by: Greg Kurz <groug@kaod.org>
---
 hw/ppc/spapr.c         |   18 ++++++++++--------
 include/hw/ppc/spapr.h |    3 +--
 2 files changed, 11 insertions(+), 10 deletions(-)

Comments

Philippe Mathieu-Daudé Oct. 26, 2020, 1:49 p.m. UTC | #1
On 10/26/20 1:40 PM, Greg Kurz wrote:
> spapr_reallocate_hpt() has three users, two of which pass &error_fatal
> and the third one, htab_load(), passes &local_err, uses it to detect
> failures and simply propagates -EINVAL up to vmstate_load(), which will
> cause QEMU to exit. It is thus confusing that spapr_reallocate_hpt()
> doesn't return right away when an error is detected in some cases. Also,
> the comment suggesting that the caller is welcome to try to carry on
> seems like a remnant in this respect.
> 
> This can be improved:
> - change spapr_reallocate_hpt() to always report a negative errno on
>    failure, either as reported by KVM or -ENOSPC if the HPT is smaller
>    than what was asked,
> - use that to detect failures in htab_load() which is preferred over
>    checking &local_err,
> - propagate this negative errno to vmstate_load() because it is more
>    accurate than propagating -EINVAL for all possible errors.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>
...

> -void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
> -                          Error **errp)
> +int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp)
>   {
>       ERRP_GUARD();
>       long rc;
> @@ -1496,7 +1495,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
>   
>       if (rc == -EOPNOTSUPP) {
>           error_setg(errp, "HPT not supported in nested guests");
> -        return;
> +        return -EOPNOTSUPP;
>       }
>   
>       if (rc < 0) {
> @@ -1504,8 +1503,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
>           error_setg_errno(errp, errno, "Failed to allocate KVM HPT of order %d",
>                            shift);
>           error_append_hint(errp, "Try smaller maxmem?\n");
> -        /* This is almost certainly fatal, but if the caller really
> -         * wants to carry on with shift == 0, it's welcome to try */
> +        return -errno;

Maybe returning here should be in a previous patch.
Otherwise patch looks good.

>       } else if (rc > 0) {
>           /* kernel-side HPT allocated */
>           if (rc != shift) {
> @@ -1513,6 +1511,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
>                          "Requested order %d HPT, but kernel allocated order %ld",
>                          shift, rc);
>               error_append_hint(errp, "Try smaller maxmem?\n");
> +            return -ENOSPC;
>           }
>   
>           spapr->htab_shift = shift;
> @@ -1533,6 +1532,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
>       /* We're setting up a hash table, so that means we're not radix */
>       spapr->patb_entry = 0;
>       spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT);
> +    return 0;
>   }
>   
>   void spapr_setup_hpt(SpaprMachineState *spapr)
> @@ -2286,11 +2286,13 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
>       }
>   
>       if (section_hdr) {
> +        int ret;
> +
>           /* First section gives the htab size */
> -        spapr_reallocate_hpt(spapr, section_hdr, &local_err);
> -        if (local_err) {
> +        ret = spapr_reallocate_hpt(spapr, section_hdr, &local_err);
> +        if (ret < 0) {
>               error_report_err(local_err);
> -            return -EINVAL;
> +            return ret;
>           }
>           return 0;
>       }
...
Greg Kurz Oct. 26, 2020, 2:47 p.m. UTC | #2
On Mon, 26 Oct 2020 14:49:34 +0100
Philippe Mathieu-Daudé <philmd@redhat.com> wrote:

> On 10/26/20 1:40 PM, Greg Kurz wrote:
> > spapr_reallocate_hpt() has three users, two of which pass &error_fatal
> > and the third one, htab_load(), passes &local_err, uses it to detect
> > failures and simply propagates -EINVAL up to vmstate_load(), which will
> > cause QEMU to exit. It is thus confusing that spapr_reallocate_hpt()
> > doesn't return right away when an error is detected in some cases. Also,
> > the comment suggesting that the caller is welcome to try to carry on
> > seems like a remnant in this respect.
> > 
> > This can be improved:
> > - change spapr_reallocate_hpt() to always report a negative errno on
> >    failure, either as reported by KVM or -ENOSPC if the HPT is smaller
> >    than what was asked,
> > - use that to detect failures in htab_load() which is preferred over
> >    checking &local_err,
> > - propagate this negative errno to vmstate_load() because it is more
> >    accurate than propagating -EINVAL for all possible errors.
> > 
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> ...
> 
> > -void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
> > -                          Error **errp)
> > +int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp)
> >   {
> >       ERRP_GUARD();
> >       long rc;
> > @@ -1496,7 +1495,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
> >   
> >       if (rc == -EOPNOTSUPP) {
> >           error_setg(errp, "HPT not supported in nested guests");
> > -        return;
> > +        return -EOPNOTSUPP;
> >       }
> >   
> >       if (rc < 0) {
> > @@ -1504,8 +1503,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
> >           error_setg_errno(errp, errno, "Failed to allocate KVM HPT of order %d",
> >                            shift);
> >           error_append_hint(errp, "Try smaller maxmem?\n");
> > -        /* This is almost certainly fatal, but if the caller really
> > -         * wants to carry on with shift == 0, it's welcome to try */
> > +        return -errno;
> 
> Maybe returning here should be in a previous patch.
> Otherwise patch looks good.
> 

It could have been indeed...

> >       } else if (rc > 0) {
> >           /* kernel-side HPT allocated */
> >           if (rc != shift) {
> > @@ -1513,6 +1511,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
> >                          "Requested order %d HPT, but kernel allocated order %ld",
> >                          shift, rc);
> >               error_append_hint(errp, "Try smaller maxmem?\n");
> > +            return -ENOSPC;

... along with this one.

I didn't go this way because it doesn't really affect the final behavior since
QEMU exits in all cases. It's mostly about propagating an appropriate errno up
to VMState in the case of htab_load(). But if you find it clearer and I need
to post a v2, I can certainly do that.

> >           }
> >   
> >           spapr->htab_shift = shift;
> > @@ -1533,6 +1532,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
> >       /* We're setting up a hash table, so that means we're not radix */
> >       spapr->patb_entry = 0;
> >       spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT);
> > +    return 0;
> >   }
> >   
> >   void spapr_setup_hpt(SpaprMachineState *spapr)
> > @@ -2286,11 +2286,13 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
> >       }
> >   
> >       if (section_hdr) {
> > +        int ret;
> > +
> >           /* First section gives the htab size */
> > -        spapr_reallocate_hpt(spapr, section_hdr, &local_err);
> > -        if (local_err) {
> > +        ret = spapr_reallocate_hpt(spapr, section_hdr, &local_err);
> > +        if (ret < 0) {
> >               error_report_err(local_err);
> > -            return -EINVAL;
> > +            return ret;
> >           }
> >           return 0;
> >       }
> ...
>
David Gibson Oct. 27, 2020, 2:03 a.m. UTC | #3
On Mon, Oct 26, 2020 at 01:40:54PM +0100, Greg Kurz wrote:
> spapr_reallocate_hpt() has three users, two of which pass &error_fatal
> and the third one, htab_load(), passes &local_err, uses it to detect
> failures and simply propagates -EINVAL up to vmstate_load(), which will
> cause QEMU to exit. It is thus confusing that spapr_reallocate_hpt()
> doesn't return right away when an error is detected in some cases. Also,
> the comment suggesting that the caller is welcome to try to carry on
> seems like a remnant in this respect.
> 
> This can be improved:
> - change spapr_reallocate_hpt() to always report a negative errno on
>   failure, either as reported by KVM or -ENOSPC if the HPT is smaller
>   than what was asked,
> - use that to detect failures in htab_load() which is preferred over
>   checking &local_err,
> - propagate this negative errno to vmstate_load() because it is more
>   accurate than propagating -EINVAL for all possible errors.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>

Applied to ppc-for-5.2, thanks.

> ---
>  hw/ppc/spapr.c         |   18 ++++++++++--------
>  include/hw/ppc/spapr.h |    3 +--
>  2 files changed, 11 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index ff7de7da2875..12a012d9dd09 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1483,8 +1483,7 @@ void spapr_free_hpt(SpaprMachineState *spapr)
>      close_htab_fd(spapr);
>  }
>  
> -void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
> -                          Error **errp)
> +int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp)
>  {
>      ERRP_GUARD();
>      long rc;
> @@ -1496,7 +1495,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
>  
>      if (rc == -EOPNOTSUPP) {
>          error_setg(errp, "HPT not supported in nested guests");
> -        return;
> +        return -EOPNOTSUPP;
>      }
>  
>      if (rc < 0) {
> @@ -1504,8 +1503,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
>          error_setg_errno(errp, errno, "Failed to allocate KVM HPT of order %d",
>                           shift);
>          error_append_hint(errp, "Try smaller maxmem?\n");
> -        /* This is almost certainly fatal, but if the caller really
> -         * wants to carry on with shift == 0, it's welcome to try */
> +        return -errno;
>      } else if (rc > 0) {
>          /* kernel-side HPT allocated */
>          if (rc != shift) {
> @@ -1513,6 +1511,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
>                         "Requested order %d HPT, but kernel allocated order %ld",
>                         shift, rc);
>              error_append_hint(errp, "Try smaller maxmem?\n");
> +            return -ENOSPC;
>          }
>  
>          spapr->htab_shift = shift;
> @@ -1533,6 +1532,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
>      /* We're setting up a hash table, so that means we're not radix */
>      spapr->patb_entry = 0;
>      spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT);
> +    return 0;
>  }
>  
>  void spapr_setup_hpt(SpaprMachineState *spapr)
> @@ -2286,11 +2286,13 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
>      }
>  
>      if (section_hdr) {
> +        int ret;
> +
>          /* First section gives the htab size */
> -        spapr_reallocate_hpt(spapr, section_hdr, &local_err);
> -        if (local_err) {
> +        ret = spapr_reallocate_hpt(spapr, section_hdr, &local_err);
> +        if (ret < 0) {
>              error_report_err(local_err);
> -            return -EINVAL;
> +            return ret;
>          }
>          return 0;
>      }
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index bb47896f173b..2e89e36cfbdc 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -846,8 +846,7 @@ void spapr_hotplug_req_add_by_count_indexed(SpaprDrcType drc_type,
>  void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
>                                                 uint32_t count, uint32_t index);
>  int spapr_hpt_shift_for_ramsize(uint64_t ramsize);
> -void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
> -                          Error **errp);
> +int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp);
>  void spapr_clear_pending_events(SpaprMachineState *spapr);
>  void spapr_clear_pending_hotplug_events(SpaprMachineState *spapr);
>  int spapr_max_server_number(SpaprMachineState *spapr);
> 
>
Philippe Mathieu-Daudé Oct. 27, 2020, 8:48 a.m. UTC | #4
On 10/26/20 3:47 PM, Greg Kurz wrote:
> On Mon, 26 Oct 2020 14:49:34 +0100
> Philippe Mathieu-Daudé <philmd@redhat.com> wrote:
> 
>> On 10/26/20 1:40 PM, Greg Kurz wrote:
>>> spapr_reallocate_hpt() has three users, two of which pass &error_fatal
>>> and the third one, htab_load(), passes &local_err, uses it to detect
>>> failures and simply propagates -EINVAL up to vmstate_load(), which will
>>> cause QEMU to exit. It is thus confusing that spapr_reallocate_hpt()
>>> doesn't return right away when an error is detected in some cases. Also,
>>> the comment suggesting that the caller is welcome to try to carry on
>>> seems like a remnant in this respect.
>>>
>>> This can be improved:
>>> - change spapr_reallocate_hpt() to always report a negative errno on
>>>    failure, either as reported by KVM or -ENOSPC if the HPT is smaller
>>>    than what was asked,
>>> - use that to detect failures in htab_load() which is preferred over
>>>    checking &local_err,
>>> - propagate this negative errno to vmstate_load() because it is more
>>>    accurate than propagating -EINVAL for all possible errors.
>>>
>>> Signed-off-by: Greg Kurz <groug@kaod.org>
...

>>>       if (rc < 0) {
>>> @@ -1504,8 +1503,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
>>>           error_setg_errno(errp, errno, "Failed to allocate KVM HPT of order %d",
>>>                            shift);
>>>           error_append_hint(errp, "Try smaller maxmem?\n");
>>> -        /* This is almost certainly fatal, but if the caller really
>>> -         * wants to carry on with shift == 0, it's welcome to try */
>>> +        return -errno;
>>
>> Maybe returning here should be in a previous patch.
>> Otherwise patch looks good.
>>
> 
> It could have been indeed...
> 
>>>       } else if (rc > 0) {
>>>           /* kernel-side HPT allocated */
>>>           if (rc != shift) {
>>> @@ -1513,6 +1511,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
>>>                          "Requested order %d HPT, but kernel allocated order %ld",
>>>                          shift, rc);
>>>               error_append_hint(errp, "Try smaller maxmem?\n");
>>> +            return -ENOSPC;
> 
> ... along with this one.
> 
> I didn't go this way because it doesn't really affect the final behavior since
> QEMU exits in all cases. It's mostly about propagating an appropriate errno up
> to VMState in the case of htab_load(). But if you find it clearer and I need
> to post a v2, I can certainly do that.

As a reviewer I prefer dumb obvious patches I can quickly understand.
If I stop, spend too long, am not sure, I spend time to ask, and usually
stop reviewing the series. Then you spend time to answer, eventually
respin. In doubt, KISS.

Patch is queued so no need for v2.
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index ff7de7da2875..12a012d9dd09 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1483,8 +1483,7 @@  void spapr_free_hpt(SpaprMachineState *spapr)
     close_htab_fd(spapr);
 }
 
-void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
-                          Error **errp)
+int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp)
 {
     ERRP_GUARD();
     long rc;
@@ -1496,7 +1495,7 @@  void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
 
     if (rc == -EOPNOTSUPP) {
         error_setg(errp, "HPT not supported in nested guests");
-        return;
+        return -EOPNOTSUPP;
     }
 
     if (rc < 0) {
@@ -1504,8 +1503,7 @@  void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
         error_setg_errno(errp, errno, "Failed to allocate KVM HPT of order %d",
                          shift);
         error_append_hint(errp, "Try smaller maxmem?\n");
-        /* This is almost certainly fatal, but if the caller really
-         * wants to carry on with shift == 0, it's welcome to try */
+        return -errno;
     } else if (rc > 0) {
         /* kernel-side HPT allocated */
         if (rc != shift) {
@@ -1513,6 +1511,7 @@  void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
                        "Requested order %d HPT, but kernel allocated order %ld",
                        shift, rc);
             error_append_hint(errp, "Try smaller maxmem?\n");
+            return -ENOSPC;
         }
 
         spapr->htab_shift = shift;
@@ -1533,6 +1532,7 @@  void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
     /* We're setting up a hash table, so that means we're not radix */
     spapr->patb_entry = 0;
     spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT);
+    return 0;
 }
 
 void spapr_setup_hpt(SpaprMachineState *spapr)
@@ -2286,11 +2286,13 @@  static int htab_load(QEMUFile *f, void *opaque, int version_id)
     }
 
     if (section_hdr) {
+        int ret;
+
         /* First section gives the htab size */
-        spapr_reallocate_hpt(spapr, section_hdr, &local_err);
-        if (local_err) {
+        ret = spapr_reallocate_hpt(spapr, section_hdr, &local_err);
+        if (ret < 0) {
             error_report_err(local_err);
-            return -EINVAL;
+            return ret;
         }
         return 0;
     }
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index bb47896f173b..2e89e36cfbdc 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -846,8 +846,7 @@  void spapr_hotplug_req_add_by_count_indexed(SpaprDrcType drc_type,
 void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
                                                uint32_t count, uint32_t index);
 int spapr_hpt_shift_for_ramsize(uint64_t ramsize);
-void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
-                          Error **errp);
+int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp);
 void spapr_clear_pending_events(SpaprMachineState *spapr);
 void spapr_clear_pending_hotplug_events(SpaprMachineState *spapr);
 int spapr_max_server_number(SpaprMachineState *spapr);