diff mbox series

[2/4] smbus: Fix spd_data_generate() error API violation

Message ID 20200420132826.8879-3-armbru@redhat.com
State New
Headers show
Series Subject: [PATCH 0/4] smbus: SPD fixes | expand

Commit Message

Markus Armbruster April 20, 2020, 1:28 p.m. UTC
The Error ** argument must be NULL, &error_abort, &error_fatal, or a
pointer to a variable containing NULL.  Passing an argument of the
latter kind twice without clearing it in between is wrong: if the
first call sets an error, it no longer points to NULL for the second
call.

spd_data_generate() can pass @errp to error_setg() more than once when
it adjusts both memory size and type.  Harmless, because no caller
passes anything that needs adjusting.  Until the previous commit,
sam460ex passed types that needed adjusting, but not sizes.

spd_data_generate()'s contract is rather awkward:

    If everything's fine, return non-null and don't set an error.

    Else, if memory size or type need adjusting, return non-null and
    set an error describing the adjustment.

    Else, return null and set an error reporting why no data can be
    generated.

Its callers treat the error as a warning even when null is returned.
They don't create the "smbus-eeprom" device then.  Suspicious.

Since the previous commit, only "everything's fine" can actually
happen.  Drop the unused code and simplify the callers.  This gets rid
of the error API violation.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 include/hw/i2c/smbus_eeprom.h |  2 +-
 hw/i2c/smbus_eeprom.c         | 30 ++++--------------------------
 hw/mips/mips_fulong2e.c       | 10 ++--------
 hw/ppc/sam460ex.c             | 12 +++---------
 4 files changed, 10 insertions(+), 44 deletions(-)

Comments

BALATON Zoltan April 20, 2020, 2:20 p.m. UTC | #1
On Mon, 20 Apr 2020, Markus Armbruster wrote:
> The Error ** argument must be NULL, &error_abort, &error_fatal, or a
> pointer to a variable containing NULL.  Passing an argument of the
> latter kind twice without clearing it in between is wrong: if the
> first call sets an error, it no longer points to NULL for the second
> call.
>
> spd_data_generate() can pass @errp to error_setg() more than once when
> it adjusts both memory size and type.  Harmless, because no caller
> passes anything that needs adjusting.  Until the previous commit,
> sam460ex passed types that needed adjusting, but not sizes.
>
> spd_data_generate()'s contract is rather awkward:
>
>    If everything's fine, return non-null and don't set an error.
>
>    Else, if memory size or type need adjusting, return non-null and
>    set an error describing the adjustment.
>
>    Else, return null and set an error reporting why no data can be
>    generated.
>
> Its callers treat the error as a warning even when null is returned.
> They don't create the "smbus-eeprom" device then.  Suspicious.

The idea here again is to make it work if there's a way it could work 
rather than throw back an error to user and bail. This is for user 
convenience in the likely case the user knows nothing about the board or 
SPD data restrictions. You seem to disagree with this and want to remove 
all such logic from everywhere. Despite the title of this patch it's not 
just fixing error API usage but removing those fix ups.

If Error cannot be used for this could you change error_setg to 
warn_report and keep the fix ups untouched? That fixes the problem with 
error (although leaves no chance to board code to handle errors). Maybe 
fix Error to be usable for what it's intended for rather than removing 
cases it can't handle.

Regards,
BALATON Zoltan

> Since the previous commit, only "everything's fine" can actually
> happen.  Drop the unused code and simplify the callers.  This gets rid
> of the error API violation.
>
> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> ---
> include/hw/i2c/smbus_eeprom.h |  2 +-
> hw/i2c/smbus_eeprom.c         | 30 ++++--------------------------
> hw/mips/mips_fulong2e.c       | 10 ++--------
> hw/ppc/sam460ex.c             | 12 +++---------
> 4 files changed, 10 insertions(+), 44 deletions(-)
>
> diff --git a/include/hw/i2c/smbus_eeprom.h b/include/hw/i2c/smbus_eeprom.h
> index 15e2151b50..68b0063ab6 100644
> --- a/include/hw/i2c/smbus_eeprom.h
> +++ b/include/hw/i2c/smbus_eeprom.h
> @@ -31,6 +31,6 @@ void smbus_eeprom_init(I2CBus *bus, int nb_eeprom,
>                        const uint8_t *eeprom_spd, int size);
>
> enum sdram_type { SDR = 0x4, DDR = 0x7, DDR2 = 0x8 };
> -uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t size, Error **errp);
> +uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t size);
>
> #endif
> diff --git a/hw/i2c/smbus_eeprom.c b/hw/i2c/smbus_eeprom.c
> index 5adf3b15b5..07fbbf87f1 100644
> --- a/hw/i2c/smbus_eeprom.c
> +++ b/hw/i2c/smbus_eeprom.c
> @@ -195,8 +195,7 @@ void smbus_eeprom_init(I2CBus *smbus, int nb_eeprom,
> }
>
> /* Generate SDRAM SPD EEPROM data describing a module of type and size */
> -uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t ram_size,
> -                           Error **errp)
> +uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t ram_size)
> {
>     uint8_t *spd;
>     uint8_t nbanks;
> @@ -222,29 +221,10 @@ uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t ram_size,
>         g_assert_not_reached();
>     }
>     size = ram_size >> 20; /* work in terms of megabytes */
> -    if (size < 4) {
> -        error_setg(errp, "SDRAM size is too small");
> -        return NULL;
> -    }
>     sz_log2 = 31 - clz32(size);
>     size = 1U << sz_log2;
> -    if (ram_size > size * MiB) {
> -        error_setg(errp, "SDRAM size 0x"RAM_ADDR_FMT" is not a power of 2, "
> -                   "truncating to %u MB", ram_size, size);
> -    }
> -    if (sz_log2 < min_log2) {
> -        error_setg(errp,
> -                   "Memory size is too small for SDRAM type, adjusting type");
> -        if (size >= 32) {
> -            type = DDR;
> -            min_log2 = 5;
> -            max_log2 = 12;
> -        } else {
> -            type = SDR;
> -            min_log2 = 2;
> -            max_log2 = 9;
> -        }
> -    }
> +    assert(ram_size == size * MiB);
> +    assert(sz_log2 >= min_log2);
>
>     nbanks = 1;
>     while (sz_log2 > max_log2 && nbanks < 8) {
> @@ -252,9 +232,7 @@ uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t ram_size,
>         nbanks++;
>     }
>
> -    if (size > (1ULL << sz_log2) * nbanks) {
> -        error_setg(errp, "Memory size is too big for SDRAM, truncating");
> -    }
> +    assert(size == (1ULL << sz_log2) * nbanks);
>
>     /* split to 2 banks if possible to avoid a bug in MIPS Malta firmware */
>     if (nbanks == 1 && sz_log2 > min_log2) {
> diff --git a/hw/mips/mips_fulong2e.c b/hw/mips/mips_fulong2e.c
> index 5040afd581..ef02d54b33 100644
> --- a/hw/mips/mips_fulong2e.c
> +++ b/hw/mips/mips_fulong2e.c
> @@ -297,7 +297,6 @@ static void mips_fulong2e_init(MachineState *machine)
>     MemoryRegion *bios = g_new(MemoryRegion, 1);
>     long bios_size;
>     uint8_t *spd_data;
> -    Error *err = NULL;
>     int64_t kernel_entry;
>     PCIBus *pci_bus;
>     ISABus *isa_bus;
> @@ -377,13 +376,8 @@ static void mips_fulong2e_init(MachineState *machine)
>     }
>
>     /* Populate SPD eeprom data */
> -    spd_data = spd_data_generate(DDR, machine->ram_size, &err);
> -    if (err) {
> -        warn_report_err(err);
> -    }
> -    if (spd_data) {
> -        smbus_eeprom_init_one(smbus, 0x50, spd_data);
> -    }
> +    spd_data = spd_data_generate(DDR, machine->ram_size);
> +    smbus_eeprom_init_one(smbus, 0x50, spd_data);
>
>     mc146818_rtc_init(isa_bus, 2000, NULL);
>
> diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
> index 856bc0b5a3..029fb6191a 100644
> --- a/hw/ppc/sam460ex.c
> +++ b/hw/ppc/sam460ex.c
> @@ -292,7 +292,6 @@ static void sam460ex_init(MachineState *machine)
>     SysBusDevice *sbdev;
>     struct boot_info *boot_info;
>     uint8_t *spd_data;
> -    Error *err = NULL;
>     int success;
>
>     cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
> @@ -336,14 +335,9 @@ static void sam460ex_init(MachineState *machine)
>     i2c = PPC4xx_I2C(dev)->bus;
>     /* SPD EEPROM on RAM module */
>     spd_data = spd_data_generate(ram_sizes[0] < 256 * MiB ? SDR : DDR2,
> -                                 ram_sizes[0], &err);
> -    if (err) {
> -        warn_report_err(err);
> -    }
> -    if (spd_data) {
> -        spd_data[20] = 4; /* SO-DIMM module */
> -        smbus_eeprom_init_one(i2c, 0x50, spd_data);
> -    }
> +                                 ram_sizes[0]);
> +    spd_data[20] = 4; /* SO-DIMM module */
> +    smbus_eeprom_init_one(i2c, 0x50, spd_data);
>     /* RTC */
>     i2c_create_slave(i2c, "m41t80", 0x68);
>
>
Markus Armbruster April 21, 2020, 5:28 a.m. UTC | #2
BALATON Zoltan <balaton@eik.bme.hu> writes:

> On Mon, 20 Apr 2020, Markus Armbruster wrote:
>> The Error ** argument must be NULL, &error_abort, &error_fatal, or a
>> pointer to a variable containing NULL.  Passing an argument of the
>> latter kind twice without clearing it in between is wrong: if the
>> first call sets an error, it no longer points to NULL for the second
>> call.
>>
>> spd_data_generate() can pass @errp to error_setg() more than once when
>> it adjusts both memory size and type.  Harmless, because no caller
>> passes anything that needs adjusting.  Until the previous commit,
>> sam460ex passed types that needed adjusting, but not sizes.
>>
>> spd_data_generate()'s contract is rather awkward:
>>
>>    If everything's fine, return non-null and don't set an error.
>>
>>    Else, if memory size or type need adjusting, return non-null and
>>    set an error describing the adjustment.
>>
>>    Else, return null and set an error reporting why no data can be
>>    generated.
>>
>> Its callers treat the error as a warning even when null is returned.
>> They don't create the "smbus-eeprom" device then.  Suspicious.
>
> The idea here again is to make it work if there's a way it could work
> rather than throw back an error to user and bail. This is for user
> convenience in the likely case the user knows nothing about the board
> or SPD data restrictions.

We're in perfect agreement that the user of QEMU should not need to know
anything about memory type and number of banks.  QEMU should pick a
sensible configuration for any memory size it supports.

>                           You seem to disagree with this

Here's what I actually disagree with:

1. QEMU warning the user about its choice of memory type, but only
sometimes.  Why warn?  There is nothing wrong, and there is nothing the
user can do to avoid the condition that triggers the warning.

2. QEMU changing the user's memory size.  Yes, I understand that's an
attempt to be helpful, but I prefer my tools not to second-guess my
intent.

>                                                          and want to
> remove all such logic from everywhere. Despite the title of this patch
> it's not just fixing error API usage but removing those fix ups.

I'm removing unused code that is also broken.  If you want to keep it,
please fix it, and provide a user and/or a unit test.  Without that, it
is a trap for future callers.

> If Error cannot be used for this could you change error_setg to
> warn_report and keep the fix ups untouched? That fixes the problem
> with error (although leaves no chance to board code to handle
> errors). Maybe fix Error to be usable for what it's intended for
> rather than removing cases it can't handle.

Error is designed for errors, not warnings.

A function either succeeds (no error) or fails (one error).

It can be pressed into service for warnings (you did, although in a
buggy way).  You still can return at most one Error per Error **
parameter.  If you need multiple warnings, use multiple parameters
(ugh!).  You could also try to squash multiple warnings into one the
hints mechanism.

I'd advise against combining warn_report() and Error ** in one function.
A function should either take care of talking to the user completely, or
leave it to its caller completely.
BALATON Zoltan April 22, 2020, 1:43 p.m. UTC | #3
On Tue, 21 Apr 2020, Markus Armbruster wrote:
> BALATON Zoltan <balaton@eik.bme.hu> writes:
>> On Mon, 20 Apr 2020, Markus Armbruster wrote:
>>> The Error ** argument must be NULL, &error_abort, &error_fatal, or a
>>> pointer to a variable containing NULL.  Passing an argument of the
>>> latter kind twice without clearing it in between is wrong: if the
>>> first call sets an error, it no longer points to NULL for the second
>>> call.
>>>
>>> spd_data_generate() can pass @errp to error_setg() more than once when
>>> it adjusts both memory size and type.  Harmless, because no caller
>>> passes anything that needs adjusting.  Until the previous commit,
>>> sam460ex passed types that needed adjusting, but not sizes.
>>>
>>> spd_data_generate()'s contract is rather awkward:
>>>
>>>    If everything's fine, return non-null and don't set an error.
>>>
>>>    Else, if memory size or type need adjusting, return non-null and
>>>    set an error describing the adjustment.
>>>
>>>    Else, return null and set an error reporting why no data can be
>>>    generated.
>>>
>>> Its callers treat the error as a warning even when null is returned.
>>> They don't create the "smbus-eeprom" device then.  Suspicious.
>>
>> The idea here again is to make it work if there's a way it could work
>> rather than throw back an error to user and bail. This is for user
>> convenience in the likely case the user knows nothing about the board
>> or SPD data restrictions.
>
> We're in perfect agreement that the user of QEMU should not need to know
> anything about memory type and number of banks.  QEMU should pick a
> sensible configuration for any memory size it supports.

I though it could be useful to warn the users when QEMU had to fix up some 
values to notify them that what they get may not be what they expect and 
can then know why. If the message really annoys you you can remove it but 
I think it can be useful. I just think your real problem with it is that 
Error can't support it so it's easier to remove the warning than fixing 
Error or use warn_report instead.

>>                           You seem to disagree with this
>
> Here's what I actually disagree with:
>
> 1. QEMU warning the user about its choice of memory type, but only
> sometimes.  Why warn?  There is nothing wrong, and there is nothing the
> user can do to avoid the condition that triggers the warning.

The memory size that triggers the warning is specified by the user so the 
user can do someting about it.

> 2. QEMU changing the user's memory size.  Yes, I understand that's an
> attempt to be helpful, but I prefer my tools not to second-guess my
> intent.

I agree with that and also hate Windows's habit of trying to be more 
intelligent than the user and prefer the Unix way however the average 
users of QEMU (from my perpective, who wants to run Amiga like OSes 
typically on Windows and for the most part not knowing what they are 
doing) are already intimidated by the messy QEMU command line interface 
and will specify random values and complain or go away if it does not work 
so making their life a little easier is not useless. These users (or any 
CLI users) are apparently not relevant from your point of view but they do 
exist and I think should be supported better.

>>                                                          and want to
>> remove all such logic from everywhere. Despite the title of this patch
>> it's not just fixing error API usage but removing those fix ups.
>
> I'm removing unused code that is also broken.  If you want to keep it,
> please fix it, and provide a user and/or a unit test.  Without that, it
> is a trap for future callers.

What was broken in this patch? Isn't freeing the previous warning when 
adding a new one or combining them as you say below (although I don't 
really get what you mean) would fix it without removing fix ups? It's only 
unused now because in previous patches you've removed all usages: first 
broke memory fixup because of some internal QEMU API did not support 
fixing up memory size so instead of fixing that API removed what did not 
fit, then now removing type fix ups because it's not fitting Error API.

Originally it did work well and produced usable values for whatever number 
the user specified and it was convenient for users. (Especially the 
sam460ex is a problematic case because of SoC and firmware limits that the 
user should not need to know about.)

>> If Error cannot be used for this could you change error_setg to
>> warn_report and keep the fix ups untouched? That fixes the problem
>> with error (although leaves no chance to board code to handle
>> errors). Maybe fix Error to be usable for what it's intended for
>> rather than removing cases it can't handle.
>
> Error is designed for errors, not warnings.
>
> A function either succeeds (no error) or fails (one error).
>
> It can be pressed into service for warnings (you did, although in a
> buggy way).  You still can return at most one Error per Error **
> parameter.  If you need multiple warnings, use multiple parameters
> (ugh!).  You could also try to squash multiple warnings into one the
> hints mechanism.
>
> I'd advise against combining warn_report() and Error ** in one function.
> A function should either take care of talking to the user completely, or
> leave it to its caller completely.

I've tried to use error so the board code can decide what's an error and 
what's a warning but if this cannot be done with this QEMU facility I 
don't know a better way than using warn_report for warnings.

Regards,
BALATON Zoltan
Markus Armbruster April 24, 2020, 9:45 a.m. UTC | #4
BALATON Zoltan <balaton@eik.bme.hu> writes:

> On Tue, 21 Apr 2020, Markus Armbruster wrote:
>> BALATON Zoltan <balaton@eik.bme.hu> writes:
>>> On Mon, 20 Apr 2020, Markus Armbruster wrote:
>>>> The Error ** argument must be NULL, &error_abort, &error_fatal, or a
>>>> pointer to a variable containing NULL.  Passing an argument of the
>>>> latter kind twice without clearing it in between is wrong: if the
>>>> first call sets an error, it no longer points to NULL for the second
>>>> call.
>>>>
>>>> spd_data_generate() can pass @errp to error_setg() more than once when
>>>> it adjusts both memory size and type.  Harmless, because no caller
>>>> passes anything that needs adjusting.  Until the previous commit,
>>>> sam460ex passed types that needed adjusting, but not sizes.
>>>>
>>>> spd_data_generate()'s contract is rather awkward:
>>>>
>>>>    If everything's fine, return non-null and don't set an error.
>>>>
>>>>    Else, if memory size or type need adjusting, return non-null and
>>>>    set an error describing the adjustment.
>>>>
>>>>    Else, return null and set an error reporting why no data can be
>>>>    generated.
>>>>
>>>> Its callers treat the error as a warning even when null is returned.
>>>> They don't create the "smbus-eeprom" device then.  Suspicious.
>>>
>>> The idea here again is to make it work if there's a way it could work
>>> rather than throw back an error to user and bail. This is for user
>>> convenience in the likely case the user knows nothing about the board
>>> or SPD data restrictions.
>>
>> We're in perfect agreement that the user of QEMU should not need to know
>> anything about memory type and number of banks.  QEMU should pick a
>> sensible configuration for any memory size it supports.
>
> I though it could be useful to warn the users when QEMU had to fix up
> some values to notify them that what they get may not be what they
> expect and can then know why.

*If* QEMU "fixed up" the user's configuration, then QEMU should indeed
warn the user.

But it doesn't fix up anything here.  This broken code is unused.

>                               If the message really annoys you you can
> remove it but I think it can be useful. I just think your real problem
> with it is that Error can't support it so it's easier to remove the
> warning than fixing Error or use warn_report instead.

It's indeed easier to remove broken unused code than to try fixing it.
YAGNI.

>>>                           You seem to disagree with this
>>
>> Here's what I actually disagree with:
>>
>> 1. QEMU warning the user about its choice of memory type, but only
>> sometimes.  Why warn?  There is nothing wrong, and there is nothing the
>> user can do to avoid the condition that triggers the warning.
>
> The memory size that triggers the warning is specified by the user so
> the user can do someting about it.

There is no way to trigger the warning.  If we dropped PATCH 1 instead
of fixing it as I did in v2, then the only way to trigger the warning is
-M sam460ex -m 64 or -m 32, and the only way to avoid it is to don't do
that.

Why would a user care whether he gets DDR or DDR2 memory?

>> 2. QEMU changing the user's memory size.  Yes, I understand that's an
>> attempt to be helpful, but I prefer my tools not to second-guess my
>> intent.
>
> I agree with that and also hate Windows's habit of trying to be more
> intelligent than the user and prefer the Unix way however the average
> users of QEMU (from my perpective, who wants to run Amiga like OSes
> typically on Windows and for the most part not knowing what they are
> doing) are already intimidated by the messy QEMU command line
> interface and will specify random values and complain or go away if it
> does not work so making their life a little easier is not
> useless. These users (or any CLI users) are apparently not relevant
> from your point of view but they do exist and I think should be
> supported better.

This theoretical.  Remember, we're talking about unused code.  Proof:

    $ ppc-softmmu/qemu-system-ppc -M sam460ex -m 4096
    qemu-system-ppc: Max 1 banks of 2048 ,1024 ,512 ,256 ,128 ,64 ,32 MB DIMM/bank supported
    qemu-system-ppc: Possible valid RAM size: 2048

I figure commit a0258e4afa "ppc/{ppc440_bamboo, sam460ex}: drop RAM size
fixup" removed the only uses.  If you disagree with it, take it up with
Igor, please.

>>>                                                          and want to
>>> remove all such logic from everywhere. Despite the title of this patch
>>> it's not just fixing error API usage but removing those fix ups.
>>
>> I'm removing unused code that is also broken.  If you want to keep it,
>> please fix it, and provide a user and/or a unit test.  Without that, it
>> is a trap for future callers.
>
> What was broken in this patch? Isn't freeing the previous warning when

My commit message explains:

     The Error ** argument must be NULL, &error_abort, &error_fatal, or a
     pointer to a variable containing NULL.  Passing an argument of the
     latter kind twice without clearing it in between is wrong: if the
     first call sets an error, it no longer points to NULL for the second
     call.

     spd_data_generate() can pass @errp to error_setg() more than once when
     it adjusts both memory size and type.  Harmless, because no caller
     passes anything that needs adjusting.  Until the previous commit,
     sam460ex passed types that needed adjusting, but not sizes.

Now have a look at the code I delete:

    if (size < 4) {
        error_setg(errp, "SDRAM size is too small");
        return NULL;
    }
    sz_log2 = 31 - clz32(size);
    size = 1U << sz_log2;
    if (ram_size > size * MiB) {
--->    error_setg(errp, "SDRAM size 0x"RAM_ADDR_FMT" is not a power of 2, "
                   "truncating to %u MB", ram_size, size);
    }
    if (sz_log2 < min_log2) {
--->    error_setg(errp,
                   "Memory size is too small for SDRAM type, adjusting type");
        if (size >= 32) {
            type = DDR;
            min_log2 = 5;
            max_log2 = 12;
        } else {
            type = SDR;
            min_log2 = 2;
            max_log2 = 9;
        }
    }
    [...]
    if (size > (1ULL << sz_log2) * nbanks) {
--->    error_setg(errp, "Memory size is too big for SDRAM, truncating");
    }

If more than one of the conditions guarding these error_setg() is true,
and @errp is neither null, &error_abort, nor &error_fatal, then it'll
point to an Error * that is not null for the non-first error_setg()
call.  This will trip the assertion in error_setv().

> adding a new one or combining them as you say below (although I don't
> really get what you mean) would fix it without removing fix ups? It's
> only unused now because in previous patches you've removed all usages:
> first broke memory fixup because of some internal QEMU API did not
> support fixing up memory size so instead of fixing that API removed
> what did not fit, then now removing type fix ups because it's not
> fitting Error API.
>
> Originally it did work well and produced usable values for whatever
> number the user specified and it was convenient for users. (Especially
> the sam460ex is a problematic case because of SoC and firmware limits
> that the user should not need to know about.)

I don't doubt it worked in your testing.

It's still wrong.

>>> If Error cannot be used for this could you change error_setg to
>>> warn_report and keep the fix ups untouched? That fixes the problem
>>> with error (although leaves no chance to board code to handle
>>> errors). Maybe fix Error to be usable for what it's intended for
>>> rather than removing cases it can't handle.
>>
>> Error is designed for errors, not warnings.
>>
>> A function either succeeds (no error) or fails (one error).
>>
>> It can be pressed into service for warnings (you did, although in a
>> buggy way).  You still can return at most one Error per Error **
>> parameter.  If you need multiple warnings, use multiple parameters
>> (ugh!).  You could also try to squash multiple warnings into one the
>> hints mechanism.
>>
>> I'd advise against combining warn_report() and Error ** in one function.
>> A function should either take care of talking to the user completely, or
>> leave it to its caller completely.
>
> I've tried to use error so the board code can decide what's an error
> and what's a warning but if this cannot be done with this QEMU
> facility I don't know a better way than using warn_report for
> warnings.

Is there any board that genuinely wants to decide what's an error and
what's a warning?

Here's spd_data_generate() contract again:

        If everything's fine, return non-null and don't set an error.

        Else, if memory size or type need adjusting, return non-null and
        set an error describing the adjustment.

        Else, return null and set an error reporting why no data can be
        generated.

It has a grand total of two users.  Both treat the second case as
warning, and the third as error.

Until you have a user that wants to handle things differently, you're
overcomplicating things.

YAGNI!
Philippe Mathieu-Daudé April 24, 2020, 10:18 a.m. UTC | #5
On 4/24/20 11:45 AM, Markus Armbruster wrote:
> BALATON Zoltan <balaton@eik.bme.hu> writes:
>> On Tue, 21 Apr 2020, Markus Armbruster wrote:
[...]
>>>
>>> Here's what I actually disagree with:
>>>
>>> 1. QEMU warning the user about its choice of memory type, but only
>>> sometimes.  Why warn?  There is nothing wrong, and there is nothing the
>>> user can do to avoid the condition that triggers the warning.
>>
>> The memory size that triggers the warning is specified by the user so
>> the user can do someting about it.
> 
> There is no way to trigger the warning.  If we dropped PATCH 1 instead
> of fixing it as I did in v2, then the only way to trigger the warning is
> -M sam460ex -m 64 or -m 32, and the only way to avoid it is to don't do
> that.
> 
> Why would a user care whether he gets DDR or DDR2 memory?

To use a different firmware code path!
Markus Armbruster April 24, 2020, 11:23 a.m. UTC | #6
Philippe Mathieu-Daudé <philmd@redhat.com> writes:

> On 4/24/20 11:45 AM, Markus Armbruster wrote:
>> BALATON Zoltan <balaton@eik.bme.hu> writes:
>>> On Tue, 21 Apr 2020, Markus Armbruster wrote:
> [...]
>>>>
>>>> Here's what I actually disagree with:
>>>>
>>>> 1. QEMU warning the user about its choice of memory type, but only
>>>> sometimes.  Why warn?  There is nothing wrong, and there is nothing the
>>>> user can do to avoid the condition that triggers the warning.
>>>
>>> The memory size that triggers the warning is specified by the user so
>>> the user can do someting about it.
>>
>> There is no way to trigger the warning.  If we dropped PATCH 1 instead
>> of fixing it as I did in v2, then the only way to trigger the warning is
>> -M sam460ex -m 64 or -m 32, and the only way to avoid it is to don't do
>> that.
>>
>> Why would a user care whether he gets DDR or DDR2 memory?
>
> To use a different firmware code path!

Let's see how that works out for users.

Assume machine foobar needs a non-default firmware for "small" memory
sizes, such as -m 64.

Alice doesn't know.  She starts QEMU like this:

    $ qemu-system-foo -M foobar -m 64

It hangs early in boot.  Not good.

Except the warning from spd_data_generate() rides to her rescue!

    qemu-system-foo: warning: Memory size is too small for SDRAM type, adjusting type

Since Alice is rather sharp, the warning makes her realize immediately
that she needs to use different firmware, like this:

    $ qemu-system-foo -M foobar -m 64 -bios roms/foobar/firmware-small.bin

Okay, I'm done telling my fairy tale.

If you want helpful, make it work out of the box: default to the
firmware that actually works, don't make users guess which one they
need.

But one more time: this is all theoretical.  We're talking about unused,
broken code.  If you want to keep it, please add users, and fix it up.
BALATON Zoltan April 24, 2020, 1:52 p.m. UTC | #7
On Fri, 24 Apr 2020, Markus Armbruster wrote:
> BALATON Zoltan <balaton@eik.bme.hu> writes:
>> On Tue, 21 Apr 2020, Markus Armbruster wrote:
>>> BALATON Zoltan <balaton@eik.bme.hu> writes:
>>>> On Mon, 20 Apr 2020, Markus Armbruster wrote:
>>>>> The Error ** argument must be NULL, &error_abort, &error_fatal, or a
>>>>> pointer to a variable containing NULL.  Passing an argument of the
>>>>> latter kind twice without clearing it in between is wrong: if the
>>>>> first call sets an error, it no longer points to NULL for the second
>>>>> call.
>>>>>
>>>>> spd_data_generate() can pass @errp to error_setg() more than once when
>>>>> it adjusts both memory size and type.  Harmless, because no caller
>>>>> passes anything that needs adjusting.  Until the previous commit,
>>>>> sam460ex passed types that needed adjusting, but not sizes.
>>>>>
>>>>> spd_data_generate()'s contract is rather awkward:
>>>>>
>>>>>    If everything's fine, return non-null and don't set an error.
>>>>>
>>>>>    Else, if memory size or type need adjusting, return non-null and
>>>>>    set an error describing the adjustment.
>>>>>
>>>>>    Else, return null and set an error reporting why no data can be
>>>>>    generated.
>>>>>
>>>>> Its callers treat the error as a warning even when null is returned.
>>>>> They don't create the "smbus-eeprom" device then.  Suspicious.
>>>>
>>>> The idea here again is to make it work if there's a way it could work
>>>> rather than throw back an error to user and bail. This is for user
>>>> convenience in the likely case the user knows nothing about the board
>>>> or SPD data restrictions.
>>>
>>> We're in perfect agreement that the user of QEMU should not need to know
>>> anything about memory type and number of banks.  QEMU should pick a
>>> sensible configuration for any memory size it supports.
>>
>> I though it could be useful to warn the users when QEMU had to fix up
>> some values to notify them that what they get may not be what they
>> expect and can then know why.
>
> *If* QEMU "fixed up" the user's configuration, then QEMU should indeed
> warn the user.
>
> But it doesn't fix up anything here.  This broken code is unused.
>
>>                               If the message really annoys you you can
>> remove it but I think it can be useful. I just think your real problem
>> with it is that Error can't support it so it's easier to remove the
>> warning than fixing Error or use warn_report instead.
>
> It's indeed easier to remove broken unused code than to try fixing it.
> YAGNI.
>
>>>>                           You seem to disagree with this
>>>
>>> Here's what I actually disagree with:
>>>
>>> 1. QEMU warning the user about its choice of memory type, but only
>>> sometimes.  Why warn?  There is nothing wrong, and there is nothing the
>>> user can do to avoid the condition that triggers the warning.
>>
>> The memory size that triggers the warning is specified by the user so
>> the user can do someting about it.
>
> There is no way to trigger the warning.  If we dropped PATCH 1 instead
> of fixing it as I did in v2, then the only way to trigger the warning is
> -M sam460ex -m 64 or -m 32, and the only way to avoid it is to don't do
> that.
>
> Why would a user care whether he gets DDR or DDR2 memory?
>
>>> 2. QEMU changing the user's memory size.  Yes, I understand that's an
>>> attempt to be helpful, but I prefer my tools not to second-guess my
>>> intent.
>>
>> I agree with that and also hate Windows's habit of trying to be more
>> intelligent than the user and prefer the Unix way however the average
>> users of QEMU (from my perpective, who wants to run Amiga like OSes
>> typically on Windows and for the most part not knowing what they are
>> doing) are already intimidated by the messy QEMU command line
>> interface and will specify random values and complain or go away if it
>> does not work so making their life a little easier is not
>> useless. These users (or any CLI users) are apparently not relevant
>> from your point of view but they do exist and I think should be
>> supported better.
>
> This theoretical.  Remember, we're talking about unused code.  Proof:
>
>    $ ppc-softmmu/qemu-system-ppc -M sam460ex -m 4096
>    qemu-system-ppc: Max 1 banks of 2048 ,1024 ,512 ,256 ,128 ,64 ,32 MB DIMM/bank supported
>    qemu-system-ppc: Possible valid RAM size: 2048
>
> I figure commit a0258e4afa "ppc/{ppc440_bamboo, sam460ex}: drop RAM size
> fixup" removed the only uses.  If you disagree with it, take it up with
> Igor, please.

I did raise similar complaints at that patch series and proposed several 
alternatives to preserve the previous functionality (sam460ex wasn't the 
only board that fixed up memory size for users) but since current APIs 
don't support that and adding this extra feature for just this machine 
wasn't a priority, my comments were accepted and ignored and I did not 
feel it would be fair to hold back the whole series for this.

I think I've explained how it worked and how it should work in my opinion 
and hope this could be revived at some point when the QEMU CLI will be 
made more user friendly (if ever) but I'm not willing to fight for that 
now. Silence in this case does not mean agreement but I see no point 
arguing if you cannot be convinced.

Just one point I'd like to keep is that no matter how you fix it the board 
code should be the only one that decides if the error is recoverable or 
not so don't abort or assert from this helper but return error to board 
code and exit from there so it has a chnace to recover in the furure or 
add "magic function" to help users wihtout having to touch this helper 
again. When you're touching the helper it would be nice to also try to 
convert the remaining two machines with hard coded SPD data so the final 
function is truely covering all current users of the function.

Regards,
BALATON Zoltan
Markus Armbruster April 29, 2020, 5:42 a.m. UTC | #8
BALATON Zoltan <balaton@eik.bme.hu> writes:

> On Fri, 24 Apr 2020, Markus Armbruster wrote:
>> BALATON Zoltan <balaton@eik.bme.hu> writes:
>>> On Tue, 21 Apr 2020, Markus Armbruster wrote:
>>>> BALATON Zoltan <balaton@eik.bme.hu> writes:
>>>>> On Mon, 20 Apr 2020, Markus Armbruster wrote:
>>>>>> The Error ** argument must be NULL, &error_abort, &error_fatal, or a
>>>>>> pointer to a variable containing NULL.  Passing an argument of the
>>>>>> latter kind twice without clearing it in between is wrong: if the
>>>>>> first call sets an error, it no longer points to NULL for the second
>>>>>> call.
>>>>>>
>>>>>> spd_data_generate() can pass @errp to error_setg() more than once when
>>>>>> it adjusts both memory size and type.  Harmless, because no caller
>>>>>> passes anything that needs adjusting.  Until the previous commit,
>>>>>> sam460ex passed types that needed adjusting, but not sizes.
>>>>>>
>>>>>> spd_data_generate()'s contract is rather awkward:
>>>>>>
>>>>>>    If everything's fine, return non-null and don't set an error.
>>>>>>
>>>>>>    Else, if memory size or type need adjusting, return non-null and
>>>>>>    set an error describing the adjustment.
>>>>>>
>>>>>>    Else, return null and set an error reporting why no data can be
>>>>>>    generated.
>>>>>>
>>>>>> Its callers treat the error as a warning even when null is returned.
>>>>>> They don't create the "smbus-eeprom" device then.  Suspicious.
>>>>>
>>>>> The idea here again is to make it work if there's a way it could work
>>>>> rather than throw back an error to user and bail. This is for user
>>>>> convenience in the likely case the user knows nothing about the board
>>>>> or SPD data restrictions.
>>>>
>>>> We're in perfect agreement that the user of QEMU should not need to know
>>>> anything about memory type and number of banks.  QEMU should pick a
>>>> sensible configuration for any memory size it supports.
>>>
>>> I though it could be useful to warn the users when QEMU had to fix up
>>> some values to notify them that what they get may not be what they
>>> expect and can then know why.
>>
>> *If* QEMU "fixed up" the user's configuration, then QEMU should indeed
>> warn the user.
>>
>> But it doesn't fix up anything here.  This broken code is unused.
>>
>>>                               If the message really annoys you you can
>>> remove it but I think it can be useful. I just think your real problem
>>> with it is that Error can't support it so it's easier to remove the
>>> warning than fixing Error or use warn_report instead.
>>
>> It's indeed easier to remove broken unused code than to try fixing it.
>> YAGNI.
>>
>>>>>                           You seem to disagree with this
>>>>
>>>> Here's what I actually disagree with:
>>>>
>>>> 1. QEMU warning the user about its choice of memory type, but only
>>>> sometimes.  Why warn?  There is nothing wrong, and there is nothing the
>>>> user can do to avoid the condition that triggers the warning.
>>>
>>> The memory size that triggers the warning is specified by the user so
>>> the user can do someting about it.
>>
>> There is no way to trigger the warning.  If we dropped PATCH 1 instead
>> of fixing it as I did in v2, then the only way to trigger the warning is
>> -M sam460ex -m 64 or -m 32, and the only way to avoid it is to don't do
>> that.
>>
>> Why would a user care whether he gets DDR or DDR2 memory?
>>
>>>> 2. QEMU changing the user's memory size.  Yes, I understand that's an
>>>> attempt to be helpful, but I prefer my tools not to second-guess my
>>>> intent.
>>>
>>> I agree with that and also hate Windows's habit of trying to be more
>>> intelligent than the user and prefer the Unix way however the average
>>> users of QEMU (from my perpective, who wants to run Amiga like OSes
>>> typically on Windows and for the most part not knowing what they are
>>> doing) are already intimidated by the messy QEMU command line
>>> interface and will specify random values and complain or go away if it
>>> does not work so making their life a little easier is not
>>> useless. These users (or any CLI users) are apparently not relevant
>>> from your point of view but they do exist and I think should be
>>> supported better.
>>
>> This theoretical.  Remember, we're talking about unused code.  Proof:
>>
>>    $ ppc-softmmu/qemu-system-ppc -M sam460ex -m 4096
>>    qemu-system-ppc: Max 1 banks of 2048 ,1024 ,512 ,256 ,128 ,64 ,32 MB DIMM/bank supported
>>    qemu-system-ppc: Possible valid RAM size: 2048
>>
>> I figure commit a0258e4afa "ppc/{ppc440_bamboo, sam460ex}: drop RAM size
>> fixup" removed the only uses.  If you disagree with it, take it up with
>> Igor, please.
>
> I did raise similar complaints at that patch series and proposed
> several alternatives to preserve the previous functionality (sam460ex
> wasn't the only board that fixed up memory size for users) but since
> current APIs don't support that and adding this extra feature for just
> this machine wasn't a priority, my comments were accepted and ignored
> and I did not feel it would be fair to hold back the whole series for
> this.
>
> I think I've explained how it worked and how it should work in my
> opinion and hope this could be revived at some point when the QEMU CLI
> will be made more user friendly (if ever) but I'm not willing to fight
> for that now. Silence in this case does not mean agreement but I see
> no point arguing if you cannot be convinced.

I acknowledge your opinion.

CLI usability for humans matters.  We're not doing well there.

A big part of the usability problem is complexity.

"Correcting" the user's configuration makes the CLI *more* complex.
Even worse when done deep down in the bowels of board code, because that
can only lead to inconsistency between machines.

The error message for a memory size we can't support could perhaps be
more helpful.

Good defaults are important.  The default memory size for sam460ex looks
okay to me.

If this commit changed the CLI, I'd gladly document your opinion /
opposition in the commit message.  But it doesn't.  It merely deletes
unreachable code because it's buggy.

> Just one point I'd like to keep is that no matter how you fix it the
> board code should be the only one that decides if the error is
> recoverable or not so don't abort or assert from this helper but
> return error to board code and exit from there so it has a chnace to
> recover in the furure or add "magic function" to help users wihtout
> having to touch this helper again. When you're touching the helper it
> would be nice to also try to convert the remaining two machines with
> hard coded SPD data so the final function is truely covering all
> current users of the function.

I'm not going to add unreachable code in the hope it may someday be
needed, sorry.

Refactoring the helper when we actually *have* a need is much more
likely to result in a helper that satisfies them.
diff mbox series

Patch

diff --git a/include/hw/i2c/smbus_eeprom.h b/include/hw/i2c/smbus_eeprom.h
index 15e2151b50..68b0063ab6 100644
--- a/include/hw/i2c/smbus_eeprom.h
+++ b/include/hw/i2c/smbus_eeprom.h
@@ -31,6 +31,6 @@  void smbus_eeprom_init(I2CBus *bus, int nb_eeprom,
                        const uint8_t *eeprom_spd, int size);
 
 enum sdram_type { SDR = 0x4, DDR = 0x7, DDR2 = 0x8 };
-uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t size, Error **errp);
+uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t size);
 
 #endif
diff --git a/hw/i2c/smbus_eeprom.c b/hw/i2c/smbus_eeprom.c
index 5adf3b15b5..07fbbf87f1 100644
--- a/hw/i2c/smbus_eeprom.c
+++ b/hw/i2c/smbus_eeprom.c
@@ -195,8 +195,7 @@  void smbus_eeprom_init(I2CBus *smbus, int nb_eeprom,
 }
 
 /* Generate SDRAM SPD EEPROM data describing a module of type and size */
-uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t ram_size,
-                           Error **errp)
+uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t ram_size)
 {
     uint8_t *spd;
     uint8_t nbanks;
@@ -222,29 +221,10 @@  uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t ram_size,
         g_assert_not_reached();
     }
     size = ram_size >> 20; /* work in terms of megabytes */
-    if (size < 4) {
-        error_setg(errp, "SDRAM size is too small");
-        return NULL;
-    }
     sz_log2 = 31 - clz32(size);
     size = 1U << sz_log2;
-    if (ram_size > size * MiB) {
-        error_setg(errp, "SDRAM size 0x"RAM_ADDR_FMT" is not a power of 2, "
-                   "truncating to %u MB", ram_size, size);
-    }
-    if (sz_log2 < min_log2) {
-        error_setg(errp,
-                   "Memory size is too small for SDRAM type, adjusting type");
-        if (size >= 32) {
-            type = DDR;
-            min_log2 = 5;
-            max_log2 = 12;
-        } else {
-            type = SDR;
-            min_log2 = 2;
-            max_log2 = 9;
-        }
-    }
+    assert(ram_size == size * MiB);
+    assert(sz_log2 >= min_log2);
 
     nbanks = 1;
     while (sz_log2 > max_log2 && nbanks < 8) {
@@ -252,9 +232,7 @@  uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t ram_size,
         nbanks++;
     }
 
-    if (size > (1ULL << sz_log2) * nbanks) {
-        error_setg(errp, "Memory size is too big for SDRAM, truncating");
-    }
+    assert(size == (1ULL << sz_log2) * nbanks);
 
     /* split to 2 banks if possible to avoid a bug in MIPS Malta firmware */
     if (nbanks == 1 && sz_log2 > min_log2) {
diff --git a/hw/mips/mips_fulong2e.c b/hw/mips/mips_fulong2e.c
index 5040afd581..ef02d54b33 100644
--- a/hw/mips/mips_fulong2e.c
+++ b/hw/mips/mips_fulong2e.c
@@ -297,7 +297,6 @@  static void mips_fulong2e_init(MachineState *machine)
     MemoryRegion *bios = g_new(MemoryRegion, 1);
     long bios_size;
     uint8_t *spd_data;
-    Error *err = NULL;
     int64_t kernel_entry;
     PCIBus *pci_bus;
     ISABus *isa_bus;
@@ -377,13 +376,8 @@  static void mips_fulong2e_init(MachineState *machine)
     }
 
     /* Populate SPD eeprom data */
-    spd_data = spd_data_generate(DDR, machine->ram_size, &err);
-    if (err) {
-        warn_report_err(err);
-    }
-    if (spd_data) {
-        smbus_eeprom_init_one(smbus, 0x50, spd_data);
-    }
+    spd_data = spd_data_generate(DDR, machine->ram_size);
+    smbus_eeprom_init_one(smbus, 0x50, spd_data);
 
     mc146818_rtc_init(isa_bus, 2000, NULL);
 
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
index 856bc0b5a3..029fb6191a 100644
--- a/hw/ppc/sam460ex.c
+++ b/hw/ppc/sam460ex.c
@@ -292,7 +292,6 @@  static void sam460ex_init(MachineState *machine)
     SysBusDevice *sbdev;
     struct boot_info *boot_info;
     uint8_t *spd_data;
-    Error *err = NULL;
     int success;
 
     cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
@@ -336,14 +335,9 @@  static void sam460ex_init(MachineState *machine)
     i2c = PPC4xx_I2C(dev)->bus;
     /* SPD EEPROM on RAM module */
     spd_data = spd_data_generate(ram_sizes[0] < 256 * MiB ? SDR : DDR2,
-                                 ram_sizes[0], &err);
-    if (err) {
-        warn_report_err(err);
-    }
-    if (spd_data) {
-        spd_data[20] = 4; /* SO-DIMM module */
-        smbus_eeprom_init_one(i2c, 0x50, spd_data);
-    }
+                                 ram_sizes[0]);
+    spd_data[20] = 4; /* SO-DIMM module */
+    smbus_eeprom_init_one(i2c, 0x50, spd_data);
     /* RTC */
     i2c_create_slave(i2c, "m41t80", 0x68);