diff mbox

[RFC,v2,4/6] hw/pci: introduce bridge-only vendor-specific capability to provide some hints to firmware

Message ID 1500761743-1669-5-git-send-email-zuban32s@gmail.com
State New
Headers show

Commit Message

Aleksandr Bezzubikov July 22, 2017, 10:15 p.m. UTC
On PCI init PCI bridges may need some
extra info about bus number to reserve, IO, memory and
prefetchable memory limits. QEMU can provide this
with special vendor-specific PCI capability.

Sizes of limits match ones from
PCI Type 1 Configuration Space Header,
number of buses to reserve occupies only 1 byte 
since it is the size of Subordinate Bus Number register.

Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
---
 hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
 include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
 2 files changed, 45 insertions(+)

Comments

Marcel Apfelbaum July 23, 2017, 3:57 p.m. UTC | #1
On 23/07/2017 1:15, Aleksandr Bezzubikov wrote:
> On PCI init PCI bridges may need some
> extra info about bus number to reserve, IO, memory and
> prefetchable memory limits. QEMU can provide this
> with special vendor-specific PCI capability.
> 
> Sizes of limits match ones from
> PCI Type 1 Configuration Space Header,
> number of buses to reserve occupies only 1 byte
> since it is the size of Subordinate Bus Number register.
> 

Hi Alexandr,

> Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
> ---
>   hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
>   include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
>   2 files changed, 45 insertions(+)
> 
> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> index 720119b..8ec6c2c 100644
> --- a/hw/pci/pci_bridge.c
> +++ b/hw/pci/pci_bridge.c
> @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
>       br->bus_name = bus_name;
>   }
>   
> +
> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,

Can you please rename to something like
'pci_bridge_qemu_cap_init' to be more specific?

> +                              uint8_t bus_reserve, uint32_t io_limit,
> +                              uint16_t mem_limit, uint64_t pref_limit,


I am not sure regarding "limit" suffix, this
is a reservation, not a limitation.

> +                              Error **errp)
> +{
> +    size_t cap_len = sizeof(PCIBridgeQemuCap);
> +    PCIBridgeQemuCap cap;
> +
> +    cap.len = cap_len;
> +    cap.bus_res = bus_reserve;
> +    cap.io_lim = io_limit & 0xFF;
> +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
> +    cap.mem_lim = mem_limit;
> +    cap.pref_lim = pref_limit & 0xFFFF;
> +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
> +
> +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
> +                                    cap_offset, cap_len, errp);
> +    if (offset < 0) {
> +        return offset;
> +    }
> +
> +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);
> +    return 0;
> +}
> +
>   static const TypeInfo pci_bridge_type_info = {
>       .name = TYPE_PCI_BRIDGE,
>       .parent = TYPE_PCI_DEVICE,
> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
> index ff7cbaa..c9f642c 100644
> --- a/include/hw/pci/pci_bridge.h
> +++ b/include/hw/pci/pci_bridge.h
> @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
>   #define  PCI_BRIDGE_CTL_DISCARD_STATUS	0x400	/* Discard timer status */
>   #define  PCI_BRIDGE_CTL_DISCARD_SERR	0x800	/* Discard timer SERR# enable */
>   
> +typedef struct PCIBridgeQemuCap {
> +    uint8_t id;     /* Standard PCI capability header field */
> +    uint8_t next;   /* Standard PCI capability header field */
> +    uint8_t len;    /* Standard PCI vendor-specific capability header field */
> +    uint8_t bus_res;
> +    uint32_t pref_lim_upper;
> +    uint16_t pref_lim;
> +    uint16_t mem_lim;

This 32bit IOMEM, right?

> +    uint16_t io_lim_upper;
> +    uint8_t io_lim;

Why do we need io_lim and io_lim_upper?

Thanks,
Marcel

> +    uint8_t padding;
> +} PCIBridgeQemuCap;
> +
> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
> +                              uint8_t bus_reserve, uint32_t io_limit,
> +                              uint16_t mem_limit, uint64_t pref_limit,
> +                              Error **errp);
> +
>   #endif /* QEMU_PCI_BRIDGE_H */
>
Aleksandr Bezzubikov July 23, 2017, 4:19 p.m. UTC | #2
2017-07-23 18:57 GMT+03:00 Marcel Apfelbaum <marcel@redhat.com>:

> On 23/07/2017 1:15, Aleksandr Bezzubikov wrote:
>
>> On PCI init PCI bridges may need some
>> extra info about bus number to reserve, IO, memory and
>> prefetchable memory limits. QEMU can provide this
>> with special vendor-specific PCI capability.
>>
>> Sizes of limits match ones from
>> PCI Type 1 Configuration Space Header,
>> number of buses to reserve occupies only 1 byte
>> since it is the size of Subordinate Bus Number register.
>>
>>
> Hi Alexandr,
>
> Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
>> ---
>>   hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
>>   include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
>>   2 files changed, 45 insertions(+)
>>
>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>> index 720119b..8ec6c2c 100644
>> --- a/hw/pci/pci_bridge.c
>> +++ b/hw/pci/pci_bridge.c
>> @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br, const char*
>> bus_name,
>>       br->bus_name = bus_name;
>>   }
>>   +
>> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
>>
>
> Can you please rename to something like
> 'pci_bridge_qemu_cap_init' to be more specific?
>
> +                              uint8_t bus_reserve, uint32_t io_limit,
>> +                              uint16_t mem_limit, uint64_t pref_limit,
>>
>
>
> I am not sure regarding "limit" suffix, this
> is a reservation, not a limitation.


I'd like this fields names to match actual registers which
are going to get the values.


>
>
> +                              Error **errp)
>> +{
>> +    size_t cap_len = sizeof(PCIBridgeQemuCap);
>> +    PCIBridgeQemuCap cap;
>> +
>> +    cap.len = cap_len;
>> +    cap.bus_res = bus_reserve;
>> +    cap.io_lim = io_limit & 0xFF;
>> +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
>> +    cap.mem_lim = mem_limit;
>> +    cap.pref_lim = pref_limit & 0xFFFF;
>> +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
>> +
>> +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
>> +                                    cap_offset, cap_len, errp);
>> +    if (offset < 0) {
>> +        return offset;
>> +    }
>> +
>> +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);
>> +    return 0;
>> +}
>> +
>>   static const TypeInfo pci_bridge_type_info = {
>>       .name = TYPE_PCI_BRIDGE,
>>       .parent = TYPE_PCI_DEVICE,
>> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
>> index ff7cbaa..c9f642c 100644
>> --- a/include/hw/pci/pci_bridge.h
>> +++ b/include/hw/pci/pci_bridge.h
>> @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const char*
>> bus_name,
>>   #define  PCI_BRIDGE_CTL_DISCARD_STATUS        0x400   /* Discard timer
>> status */
>>   #define  PCI_BRIDGE_CTL_DISCARD_SERR  0x800   /* Discard timer SERR#
>> enable */
>>   +typedef struct PCIBridgeQemuCap {
>> +    uint8_t id;     /* Standard PCI capability header field */
>> +    uint8_t next;   /* Standard PCI capability header field */
>> +    uint8_t len;    /* Standard PCI vendor-specific capability header
>> field */
>> +    uint8_t bus_res;
>> +    uint32_t pref_lim_upper;
>> +    uint16_t pref_lim;
>> +    uint16_t mem_lim;
>>
>
> This 32bit IOMEM, right?
>
> +    uint16_t io_lim_upper;
>> +    uint8_t io_lim;
>>
>
> Why do we need io_lim and io_lim_upper?
>

The idea was to be able to directly move the capability fields values
to the registers when actually using it (in firmware) code.
Secondly, it saves a little space by avoding usage 32-bit types
when 24-bit ones are desired. And the same thing with prefetchable (48->64).
But if it's more convenient no to split this value, I can do that.



>
> Thanks,
> Marcel
>
>
> +    uint8_t padding;
>> +} PCIBridgeQemuCap;
>> +
>> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
>> +                              uint8_t bus_reserve, uint32_t io_limit,
>> +                              uint16_t mem_limit, uint64_t pref_limit,
>> +                              Error **errp);
>> +
>>   #endif /* QEMU_PCI_BRIDGE_H */
>>
>>
>
Marcel Apfelbaum July 23, 2017, 4:24 p.m. UTC | #3
On 23/07/2017 19:19, Alexander Bezzubikov wrote:
> 2017-07-23 18:57 GMT+03:00 Marcel Apfelbaum <marcel@redhat.com 
> <mailto:marcel@redhat.com>>:
> 
>     On 23/07/2017 1:15, Aleksandr Bezzubikov wrote:
> 
>         On PCI init PCI bridges may need some
>         extra info about bus number to reserve, IO, memory and
>         prefetchable memory limits. QEMU can provide this
>         with special vendor-specific PCI capability.
> 
>         Sizes of limits match ones from
>         PCI Type 1 Configuration Space Header,
>         number of buses to reserve occupies only 1 byte
>         since it is the size of Subordinate Bus Number register.
> 
> 
>     Hi Alexandr,
> 
>         Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com
>         <mailto:zuban32s@gmail.com>>
>         ---
>            hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
>            include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
>            2 files changed, 45 insertions(+)
> 
>         diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>         index 720119b..8ec6c2c 100644
>         --- a/hw/pci/pci_bridge.c
>         +++ b/hw/pci/pci_bridge.c
>         @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br,
>         const char* bus_name,
>                br->bus_name = bus_name;
>            }
>            +
>         +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
> 
> 
>     Can you please rename to something like
>     'pci_bridge_qemu_cap_init' to be more specific?
> 
>         +                              uint8_t bus_reserve, uint32_t
>         io_limit,
>         +                              uint16_t mem_limit, uint64_t
>         pref_limit,
> 
> 
> 
>     I am not sure regarding "limit" suffix, this
>     is a reservation, not a limitation.
> 
> 
> I'd like this fields names to match actual registers which
> are going to get the values.
> 

For this case I think is better to have io_res..., to describe
the parameter rather than match the registers.

> 
> 
>         +                              Error **errp)
>         +{
>         +    size_t cap_len = sizeof(PCIBridgeQemuCap);
>         +    PCIBridgeQemuCap cap;
>         +
>         +    cap.len = cap_len;
>         +    cap.bus_res = bus_reserve;
>         +    cap.io_lim = io_limit & 0xFF;
>         +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
>         +    cap.mem_lim = mem_limit;
>         +    cap.pref_lim = pref_limit & 0xFFFF;
>         +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
>         +
>         +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
>         +                                    cap_offset, cap_len, errp);
>         +    if (offset < 0) {
>         +        return offset;
>         +    }
>         +
>         +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len
>         - 2);
>         +    return 0;
>         +}
>         +
>            static const TypeInfo pci_bridge_type_info = {
>                .name = TYPE_PCI_BRIDGE,
>                .parent = TYPE_PCI_DEVICE,
>         diff --git a/include/hw/pci/pci_bridge.h
>         b/include/hw/pci/pci_bridge.h
>         index ff7cbaa..c9f642c 100644
>         --- a/include/hw/pci/pci_bridge.h
>         +++ b/include/hw/pci/pci_bridge.h
>         @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const
>         char* bus_name,
>            #define  PCI_BRIDGE_CTL_DISCARD_STATUS        0x400   /*
>         Discard timer status */
>            #define  PCI_BRIDGE_CTL_DISCARD_SERR  0x800   /* Discard
>         timer SERR# enable */
>            +typedef struct PCIBridgeQemuCap {
>         +    uint8_t id;     /* Standard PCI capability header field */
>         +    uint8_t next;   /* Standard PCI capability header field */
>         +    uint8_t len;    /* Standard PCI vendor-specific capability
>         header field */
>         +    uint8_t bus_res;
>         +    uint32_t pref_lim_upper;
>         +    uint16_t pref_lim;
>         +    uint16_t mem_lim;
> 
> 
>     This 32bit IOMEM, right?
> 
>         +    uint16_t io_lim_upper;
>         +    uint8_t io_lim;
> 
> 
>     Why do we need io_lim and io_lim_upper?
> 
> 
> The idea was to be able to directly move the capability fields values
> to the registers when actually using it (in firmware) code.
> Secondly, it saves a little space by avoding usage 32-bit types
> when 24-bit ones are desired. And the same thing with prefetchable (48->64).
> But if it's more convenient no to split this value, I can do that.

With a clear explanation (Mimic of the <PCI config space field>)
I personally don't mind keeping it like that.

Thanks,
Marcel

> 
> 
>     Thanks,
>     Marcel
> 
> 
>         +    uint8_t padding;
>         +} PCIBridgeQemuCap;
>         +
>         +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
>         +                              uint8_t bus_reserve, uint32_t
>         io_limit,
>         +                              uint16_t mem_limit, uint64_t
>         pref_limit,
>         +                              Error **errp);
>         +
>            #endif /* QEMU_PCI_BRIDGE_H */
> 
> 
> 
> 
> 
> -- 
> Alexander Bezzubikov
Michael S. Tsirkin July 26, 2017, 7:43 p.m. UTC | #4
On Sun, Jul 23, 2017 at 01:15:41AM +0300, Aleksandr Bezzubikov wrote:
> On PCI init PCI bridges may need some
> extra info about bus number to reserve, IO, memory and
> prefetchable memory limits. QEMU can provide this
> with special

with a special

> vendor-specific PCI capability.
> 
> Sizes of limits match ones from
> PCI Type 1 Configuration Space Header,
> number of buses to reserve occupies only 1 byte 
> since it is the size of Subordinate Bus Number register.
> 
> Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
> ---
>  hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
>  include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
>  2 files changed, 45 insertions(+)
> 
> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> index 720119b..8ec6c2c 100644
> --- a/hw/pci/pci_bridge.c
> +++ b/hw/pci/pci_bridge.c
> @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
>      br->bus_name = bus_name;
>  }
>  
> +
> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,

help? should be qemu_cap_init?

> +                              uint8_t bus_reserve, uint32_t io_limit,
> +                              uint16_t mem_limit, uint64_t pref_limit,
> +                              Error **errp)
> +{
> +    size_t cap_len = sizeof(PCIBridgeQemuCap);
> +    PCIBridgeQemuCap cap;

This leaks info to guest. You want to init all fields here:

cap = {
 .len = ....
};

> +
> +    cap.len = cap_len;
> +    cap.bus_res = bus_reserve;
> +    cap.io_lim = io_limit & 0xFF;
> +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
> +    cap.mem_lim = mem_limit;
> +    cap.pref_lim = pref_limit & 0xFFFF;
> +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;

Please use pci_set_word etc or cpu_to_leXX.

I think it's easiest to replace struct with a set of macros then
pci_set_word does the work for you.


> +
> +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
> +                                    cap_offset, cap_len, errp);
> +    if (offset < 0) {
> +        return offset;
> +    }
> +
> +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);

+2 is yacky. See how virtio does it:

    memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
           cap->cap_len - PCI_CAP_FLAGS);


> +    return 0;
> +}
> +
>  static const TypeInfo pci_bridge_type_info = {
>      .name = TYPE_PCI_BRIDGE,
>      .parent = TYPE_PCI_DEVICE,
> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
> index ff7cbaa..c9f642c 100644
> --- a/include/hw/pci/pci_bridge.h
> +++ b/include/hw/pci/pci_bridge.h
> @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
>  #define  PCI_BRIDGE_CTL_DISCARD_STATUS	0x400	/* Discard timer status */
>  #define  PCI_BRIDGE_CTL_DISCARD_SERR	0x800	/* Discard timer SERR# enable */
>  
> +typedef struct PCIBridgeQemuCap {
> +    uint8_t id;     /* Standard PCI capability header field */
> +    uint8_t next;   /* Standard PCI capability header field */
> +    uint8_t len;    /* Standard PCI vendor-specific capability header field */
> +    uint8_t bus_res;
> +    uint32_t pref_lim_upper;

Big endian? Ugh.

> +    uint16_t pref_lim;
> +    uint16_t mem_lim;

I'd say we need 64 bit for memory.

> +    uint16_t io_lim_upper;
> +    uint8_t io_lim;
> +    uint8_t padding;

IMHO each type should have a special "don't care" flag
that would mean "I don't know".


> +} PCIBridgeQemuCap;

You don't really need this struct in the header. And pls document all fields.

> +
> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
> +                              uint8_t bus_reserve, uint32_t io_limit,
> +                              uint16_t mem_limit, uint64_t pref_limit,
> +                              Error **errp);
> +
>  #endif /* QEMU_PCI_BRIDGE_H */
> -- 
> 2.7.4
Aleksandr Bezzubikov July 26, 2017, 9:54 p.m. UTC | #5
2017-07-26 22:43 GMT+03:00 Michael S. Tsirkin <mst@redhat.com>:
> On Sun, Jul 23, 2017 at 01:15:41AM +0300, Aleksandr Bezzubikov wrote:
>> On PCI init PCI bridges may need some
>> extra info about bus number to reserve, IO, memory and
>> prefetchable memory limits. QEMU can provide this
>> with special
>
> with a special
>
>> vendor-specific PCI capability.
>>
>> Sizes of limits match ones from
>> PCI Type 1 Configuration Space Header,
>> number of buses to reserve occupies only 1 byte
>> since it is the size of Subordinate Bus Number register.
>>
>> Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
>> ---
>>  hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
>>  include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
>>  2 files changed, 45 insertions(+)
>>
>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>> index 720119b..8ec6c2c 100644
>> --- a/hw/pci/pci_bridge.c
>> +++ b/hw/pci/pci_bridge.c
>> @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
>>      br->bus_name = bus_name;
>>  }
>>
>> +
>> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
>
> help? should be qemu_cap_init?
>
>> +                              uint8_t bus_reserve, uint32_t io_limit,
>> +                              uint16_t mem_limit, uint64_t pref_limit,
>> +                              Error **errp)
>> +{
>> +    size_t cap_len = sizeof(PCIBridgeQemuCap);
>> +    PCIBridgeQemuCap cap;
>
> This leaks info to guest. You want to init all fields here:
>
> cap = {
>  .len = ....
> };

I surely can do this for len field, but as Laszlo proposed
we can use mutually exclusive fields,
e.g. pref_32 and pref_64, the only way I have left
is to use ternary operator (if we surely need this
big initializer). Keeping some if's would look better,
I think.

>
>> +
>> +    cap.len = cap_len;
>> +    cap.bus_res = bus_reserve;
>> +    cap.io_lim = io_limit & 0xFF;
>> +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
>> +    cap.mem_lim = mem_limit;
>> +    cap.pref_lim = pref_limit & 0xFFFF;
>> +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
>
> Please use pci_set_word etc or cpu_to_leXX.
>

Since now we've decided to avoid fields separation into <field> + <field_upper>,
this bitmask along with pci_set_word are no longer needed.

> I think it's easiest to replace struct with a set of macros then
> pci_set_word does the work for you.
>

I don't really want to use macros here because structure
show us the whole capability layout and this can
decrease documenting efforts. More than that,
memcpy usage is very convenient here, and I wouldn't like
to lose it.

>
>> +
>> +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
>> +                                    cap_offset, cap_len, errp);
>> +    if (offset < 0) {
>> +        return offset;
>> +    }
>> +
>> +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);
>
> +2 is yacky. See how virtio does it:
>
>     memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
>            cap->cap_len - PCI_CAP_FLAGS);
>
>

OK.

>> +    return 0;
>> +}
>> +
>>  static const TypeInfo pci_bridge_type_info = {
>>      .name = TYPE_PCI_BRIDGE,
>>      .parent = TYPE_PCI_DEVICE,
>> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
>> index ff7cbaa..c9f642c 100644
>> --- a/include/hw/pci/pci_bridge.h
>> +++ b/include/hw/pci/pci_bridge.h
>> @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
>>  #define  PCI_BRIDGE_CTL_DISCARD_STATUS       0x400   /* Discard timer status */
>>  #define  PCI_BRIDGE_CTL_DISCARD_SERR 0x800   /* Discard timer SERR# enable */
>>
>> +typedef struct PCIBridgeQemuCap {
>> +    uint8_t id;     /* Standard PCI capability header field */
>> +    uint8_t next;   /* Standard PCI capability header field */
>> +    uint8_t len;    /* Standard PCI vendor-specific capability header field */
>> +    uint8_t bus_res;
>> +    uint32_t pref_lim_upper;
>
> Big endian? Ugh.
>

Agreed, and this's gonna to disappear with
the new layout.

>> +    uint16_t pref_lim;
>> +    uint16_t mem_lim;
>
> I'd say we need 64 bit for memory.
>

Why? Non-prefetchable MEMORY_LIMIT register is 16 bits long.

>> +    uint16_t io_lim_upper;
>> +    uint8_t io_lim;
>> +    uint8_t padding;
>
> IMHO each type should have a special "don't care" flag
> that would mean "I don't know".
>
>

Don't know what? Now 0 is an indicator to do nothing with this field.

>> +} PCIBridgeQemuCap;
>
> You don't really need this struct in the header. And pls document all fields.
>
>> +
>> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
>> +                              uint8_t bus_reserve, uint32_t io_limit,
>> +                              uint16_t mem_limit, uint64_t pref_limit,
>> +                              Error **errp);
>> +
>>  #endif /* QEMU_PCI_BRIDGE_H */
>> --
>> 2.7.4



--
Alexander Bezzubikov
Laszlo Ersek July 26, 2017, 11:11 p.m. UTC | #6
On 07/26/17 23:54, Alexander Bezzubikov wrote:
> 2017-07-26 22:43 GMT+03:00 Michael S. Tsirkin <mst@redhat.com>:
>> On Sun, Jul 23, 2017 at 01:15:41AM +0300, Aleksandr Bezzubikov wrote:

>>> +    PCIBridgeQemuCap cap;
>>
>> This leaks info to guest. You want to init all fields here:
>>
>> cap = {
>>  .len = ....
>> };
> 
> I surely can do this for len field, but as Laszlo proposed
> we can use mutually exclusive fields,
> e.g. pref_32 and pref_64, the only way I have left
> is to use ternary operator (if we surely need this
> big initializer). Keeping some if's would look better,
> I think.

I think it's fine to use "if"s in order to set up the structure
partially / gradually, but then please clear the structure up-front:


  PCIBridgeQemuCap cap = { 0 };

(In general "{ 0 }" is the best initializer ever, because it can
zero-init a variable of *any* type at all. Gcc might complain about the
inexact depth of {} nesting of course, but it's nonetheless valid C.)

Or else add a memset-to-zero.

Or else, do just

  PCIBridgeQemuCap cap = { .len = ... };

which will zero-fill every other field. ("[...] all subobjects that are
not initialized explicitly shall be initialized implicitly the same as
objects that have static storage duration").

Thanks
Laszlo
Michael S. Tsirkin July 26, 2017, 11:28 p.m. UTC | #7
On Thu, Jul 27, 2017 at 12:54:07AM +0300, Alexander Bezzubikov wrote:
> 2017-07-26 22:43 GMT+03:00 Michael S. Tsirkin <mst@redhat.com>:
> > On Sun, Jul 23, 2017 at 01:15:41AM +0300, Aleksandr Bezzubikov wrote:
> >> On PCI init PCI bridges may need some
> >> extra info about bus number to reserve, IO, memory and
> >> prefetchable memory limits. QEMU can provide this
> >> with special
> >
> > with a special
> >
> >> vendor-specific PCI capability.
> >>
> >> Sizes of limits match ones from
> >> PCI Type 1 Configuration Space Header,
> >> number of buses to reserve occupies only 1 byte
> >> since it is the size of Subordinate Bus Number register.
> >>
> >> Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
> >> ---
> >>  hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
> >>  include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
> >>  2 files changed, 45 insertions(+)
> >>
> >> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> >> index 720119b..8ec6c2c 100644
> >> --- a/hw/pci/pci_bridge.c
> >> +++ b/hw/pci/pci_bridge.c
> >> @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
> >>      br->bus_name = bus_name;
> >>  }
> >>
> >> +
> >> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
> >
> > help? should be qemu_cap_init?
> >
> >> +                              uint8_t bus_reserve, uint32_t io_limit,
> >> +                              uint16_t mem_limit, uint64_t pref_limit,
> >> +                              Error **errp)
> >> +{
> >> +    size_t cap_len = sizeof(PCIBridgeQemuCap);
> >> +    PCIBridgeQemuCap cap;
> >
> > This leaks info to guest. You want to init all fields here:
> >
> > cap = {
> >  .len = ....
> > };
> 
> I surely can do this for len field, but as Laszlo proposed
> we can use mutually exclusive fields,
> e.g. pref_32 and pref_64, the only way I have left
> is to use ternary operator (if we surely need this
> big initializer). Keeping some if's would look better,
> I think.
> 
> >
> >> +
> >> +    cap.len = cap_len;
> >> +    cap.bus_res = bus_reserve;
> >> +    cap.io_lim = io_limit & 0xFF;
> >> +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
> >> +    cap.mem_lim = mem_limit;
> >> +    cap.pref_lim = pref_limit & 0xFFFF;
> >> +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
> >
> > Please use pci_set_word etc or cpu_to_leXX.
> >
> 
> Since now we've decided to avoid fields separation into <field> + <field_upper>,
> this bitmask along with pci_set_word are no longer needed.
> 
> > I think it's easiest to replace struct with a set of macros then
> > pci_set_word does the work for you.
> >
> 
> I don't really want to use macros here because structure
> show us the whole capability layout and this can
> decrease documenting efforts. More than that,
> memcpy usage is very convenient here, and I wouldn't like
> to lose it.
> 
> >
> >> +
> >> +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
> >> +                                    cap_offset, cap_len, errp);
> >> +    if (offset < 0) {
> >> +        return offset;
> >> +    }
> >> +
> >> +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);
> >
> > +2 is yacky. See how virtio does it:
> >
> >     memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
> >            cap->cap_len - PCI_CAP_FLAGS);
> >
> >
> 
> OK.
> 
> >> +    return 0;
> >> +}
> >> +
> >>  static const TypeInfo pci_bridge_type_info = {
> >>      .name = TYPE_PCI_BRIDGE,
> >>      .parent = TYPE_PCI_DEVICE,
> >> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
> >> index ff7cbaa..c9f642c 100644
> >> --- a/include/hw/pci/pci_bridge.h
> >> +++ b/include/hw/pci/pci_bridge.h
> >> @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
> >>  #define  PCI_BRIDGE_CTL_DISCARD_STATUS       0x400   /* Discard timer status */
> >>  #define  PCI_BRIDGE_CTL_DISCARD_SERR 0x800   /* Discard timer SERR# enable */
> >>
> >> +typedef struct PCIBridgeQemuCap {
> >> +    uint8_t id;     /* Standard PCI capability header field */
> >> +    uint8_t next;   /* Standard PCI capability header field */
> >> +    uint8_t len;    /* Standard PCI vendor-specific capability header field */
> >> +    uint8_t bus_res;
> >> +    uint32_t pref_lim_upper;
> >
> > Big endian? Ugh.
> >
> 
> Agreed, and this's gonna to disappear with
> the new layout.
> 
> >> +    uint16_t pref_lim;
> >> +    uint16_t mem_lim;
> >
> > I'd say we need 64 bit for memory.
> >
> 
> Why? Non-prefetchable MEMORY_LIMIT register is 16 bits long.

Hmm ok, but e.g. for io there are bridges that have extra registers
to specify non-standard non-aligned registers.

> >> +    uint16_t io_lim_upper;
> >> +    uint8_t io_lim;
> >> +    uint8_t padding;
> >
> > IMHO each type should have a special "don't care" flag
> > that would mean "I don't know".
> >
> >
> 
> Don't know what? Now 0 is an indicator to do nothing with this field.

In that case how do you say "don't allocate any memory"?


> >> +} PCIBridgeQemuCap;
> >
> > You don't really need this struct in the header. And pls document all fields.
> >
> >> +
> >> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
> >> +                              uint8_t bus_reserve, uint32_t io_limit,
> >> +                              uint16_t mem_limit, uint64_t pref_limit,
> >> +                              Error **errp);
> >> +
> >>  #endif /* QEMU_PCI_BRIDGE_H */
> >> --
> >> 2.7.4
> 
> 
> 
> --
> Alexander Bezzubikov
Marcel Apfelbaum July 27, 2017, 9:39 a.m. UTC | #8
On 27/07/2017 2:28, Michael S. Tsirkin wrote:
> On Thu, Jul 27, 2017 at 12:54:07AM +0300, Alexander Bezzubikov wrote:
>> 2017-07-26 22:43 GMT+03:00 Michael S. Tsirkin <mst@redhat.com>:
>>> On Sun, Jul 23, 2017 at 01:15:41AM +0300, Aleksandr Bezzubikov wrote:
>>>> On PCI init PCI bridges may need some
>>>> extra info about bus number to reserve, IO, memory and
>>>> prefetchable memory limits. QEMU can provide this
>>>> with special
>>>
>>> with a special
>>>
>>>> vendor-specific PCI capability.
>>>>
>>>> Sizes of limits match ones from
>>>> PCI Type 1 Configuration Space Header,
>>>> number of buses to reserve occupies only 1 byte
>>>> since it is the size of Subordinate Bus Number register.
>>>>
>>>> Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
>>>> ---
>>>>   hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
>>>>   include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
>>>>   2 files changed, 45 insertions(+)
>>>>
>>>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>>>> index 720119b..8ec6c2c 100644
>>>> --- a/hw/pci/pci_bridge.c
>>>> +++ b/hw/pci/pci_bridge.c
>>>> @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
>>>>       br->bus_name = bus_name;
>>>>   }
>>>>
>>>> +
>>>> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
>>>
>>> help? should be qemu_cap_init?
>>>
>>>> +                              uint8_t bus_reserve, uint32_t io_limit,
>>>> +                              uint16_t mem_limit, uint64_t pref_limit,
>>>> +                              Error **errp)
>>>> +{
>>>> +    size_t cap_len = sizeof(PCIBridgeQemuCap);
>>>> +    PCIBridgeQemuCap cap;
>>>
>>> This leaks info to guest. You want to init all fields here:
>>>
>>> cap = {
>>>   .len = ....
>>> };
>>
>> I surely can do this for len field, but as Laszlo proposed
>> we can use mutually exclusive fields,
>> e.g. pref_32 and pref_64, the only way I have left
>> is to use ternary operator (if we surely need this
>> big initializer). Keeping some if's would look better,
>> I think.
>>
>>>
>>>> +
>>>> +    cap.len = cap_len;
>>>> +    cap.bus_res = bus_reserve;
>>>> +    cap.io_lim = io_limit & 0xFF;
>>>> +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
>>>> +    cap.mem_lim = mem_limit;
>>>> +    cap.pref_lim = pref_limit & 0xFFFF;
>>>> +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
>>>
>>> Please use pci_set_word etc or cpu_to_leXX.
>>>
>>
>> Since now we've decided to avoid fields separation into <field> + <field_upper>,
>> this bitmask along with pci_set_word are no longer needed.
>>
>>> I think it's easiest to replace struct with a set of macros then
>>> pci_set_word does the work for you.
>>>
>>
>> I don't really want to use macros here because structure
>> show us the whole capability layout and this can
>> decrease documenting efforts. More than that,
>> memcpy usage is very convenient here, and I wouldn't like
>> to lose it.
>>
>>>
>>>> +
>>>> +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
>>>> +                                    cap_offset, cap_len, errp);
>>>> +    if (offset < 0) {
>>>> +        return offset;
>>>> +    }
>>>> +
>>>> +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);
>>>
>>> +2 is yacky. See how virtio does it:
>>>
>>>      memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
>>>             cap->cap_len - PCI_CAP_FLAGS);
>>>
>>>
>>
>> OK.
>>
>>>> +    return 0;
>>>> +}
>>>> +
>>>>   static const TypeInfo pci_bridge_type_info = {
>>>>       .name = TYPE_PCI_BRIDGE,
>>>>       .parent = TYPE_PCI_DEVICE,
>>>> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
>>>> index ff7cbaa..c9f642c 100644
>>>> --- a/include/hw/pci/pci_bridge.h
>>>> +++ b/include/hw/pci/pci_bridge.h
>>>> @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
>>>>   #define  PCI_BRIDGE_CTL_DISCARD_STATUS       0x400   /* Discard timer status */
>>>>   #define  PCI_BRIDGE_CTL_DISCARD_SERR 0x800   /* Discard timer SERR# enable */
>>>>
>>>> +typedef struct PCIBridgeQemuCap {
>>>> +    uint8_t id;     /* Standard PCI capability header field */
>>>> +    uint8_t next;   /* Standard PCI capability header field */
>>>> +    uint8_t len;    /* Standard PCI vendor-specific capability header field */
>>>> +    uint8_t bus_res;
>>>> +    uint32_t pref_lim_upper;
>>>
>>> Big endian? Ugh.
>>>
>>
>> Agreed, and this's gonna to disappear with
>> the new layout.
>>
>>>> +    uint16_t pref_lim;
>>>> +    uint16_t mem_lim;
>>>
>>> I'd say we need 64 bit for memory.
>>>
>>
>> Why? Non-prefetchable MEMORY_LIMIT register is 16 bits long.
> 
> Hmm ok, but e.g. for io there are bridges that have extra registers
> to specify non-standard non-aligned registers.
> 
>>>> +    uint16_t io_lim_upper;
>>>> +    uint8_t io_lim;
>>>> +    uint8_t padding;
>>>
>>> IMHO each type should have a special "don't care" flag
>>> that would mean "I don't know".
>>>
>>>
>>
>> Don't know what? Now 0 is an indicator to do nothing with this field.
> 
> In that case how do you say "don't allocate any memory"?
> 

We can keep the MEM/Limit registers read-only for such cases,
as they are optional registers.

Thanks,
Marcel

> 
>>>> +} PCIBridgeQemuCap;
>>>
>>> You don't really need this struct in the header. And pls document all fields.
>>>
>>>> +
>>>> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
>>>> +                              uint8_t bus_reserve, uint32_t io_limit,
>>>> +                              uint16_t mem_limit, uint64_t pref_limit,
>>>> +                              Error **errp);
>>>> +
>>>>   #endif /* QEMU_PCI_BRIDGE_H */
>>>> --
>>>> 2.7.4
>>
>>
>>
>> --
>> Alexander Bezzubikov
Laszlo Ersek July 27, 2017, 1:58 p.m. UTC | #9
On 07/27/17 11:39, Marcel Apfelbaum wrote:
> On 27/07/2017 2:28, Michael S. Tsirkin wrote:
>> On Thu, Jul 27, 2017 at 12:54:07AM +0300, Alexander Bezzubikov wrote:
>>> 2017-07-26 22:43 GMT+03:00 Michael S. Tsirkin <mst@redhat.com>:
>>>> On Sun, Jul 23, 2017 at 01:15:41AM +0300, Aleksandr Bezzubikov wrote:
>>>>> On PCI init PCI bridges may need some
>>>>> extra info about bus number to reserve, IO, memory and
>>>>> prefetchable memory limits. QEMU can provide this
>>>>> with special
>>>>
>>>> with a special
>>>>
>>>>> vendor-specific PCI capability.
>>>>>
>>>>> Sizes of limits match ones from
>>>>> PCI Type 1 Configuration Space Header,
>>>>> number of buses to reserve occupies only 1 byte
>>>>> since it is the size of Subordinate Bus Number register.
>>>>>
>>>>> Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
>>>>> ---
>>>>>   hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
>>>>>   include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
>>>>>   2 files changed, 45 insertions(+)
>>>>>
>>>>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>>>>> index 720119b..8ec6c2c 100644
>>>>> --- a/hw/pci/pci_bridge.c
>>>>> +++ b/hw/pci/pci_bridge.c
>>>>> @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br, const
>>>>> char* bus_name,
>>>>>       br->bus_name = bus_name;
>>>>>   }
>>>>>
>>>>> +
>>>>> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
>>>>
>>>> help? should be qemu_cap_init?
>>>>
>>>>> +                              uint8_t bus_reserve, uint32_t io_limit,
>>>>> +                              uint16_t mem_limit, uint64_t
>>>>> pref_limit,
>>>>> +                              Error **errp)
>>>>> +{
>>>>> +    size_t cap_len = sizeof(PCIBridgeQemuCap);
>>>>> +    PCIBridgeQemuCap cap;
>>>>
>>>> This leaks info to guest. You want to init all fields here:
>>>>
>>>> cap = {
>>>>   .len = ....
>>>> };
>>>
>>> I surely can do this for len field, but as Laszlo proposed
>>> we can use mutually exclusive fields,
>>> e.g. pref_32 and pref_64, the only way I have left
>>> is to use ternary operator (if we surely need this
>>> big initializer). Keeping some if's would look better,
>>> I think.
>>>
>>>>
>>>>> +
>>>>> +    cap.len = cap_len;
>>>>> +    cap.bus_res = bus_reserve;
>>>>> +    cap.io_lim = io_limit & 0xFF;
>>>>> +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
>>>>> +    cap.mem_lim = mem_limit;
>>>>> +    cap.pref_lim = pref_limit & 0xFFFF;
>>>>> +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
>>>>
>>>> Please use pci_set_word etc or cpu_to_leXX.
>>>>
>>>
>>> Since now we've decided to avoid fields separation into <field> +
>>> <field_upper>,
>>> this bitmask along with pci_set_word are no longer needed.
>>>
>>>> I think it's easiest to replace struct with a set of macros then
>>>> pci_set_word does the work for you.
>>>>
>>>
>>> I don't really want to use macros here because structure
>>> show us the whole capability layout and this can
>>> decrease documenting efforts. More than that,
>>> memcpy usage is very convenient here, and I wouldn't like
>>> to lose it.
>>>
>>>>
>>>>> +
>>>>> +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
>>>>> +                                    cap_offset, cap_len, errp);
>>>>> +    if (offset < 0) {
>>>>> +        return offset;
>>>>> +    }
>>>>> +
>>>>> +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);
>>>>
>>>> +2 is yacky. See how virtio does it:
>>>>
>>>>      memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
>>>>             cap->cap_len - PCI_CAP_FLAGS);
>>>>
>>>>
>>>
>>> OK.
>>>
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>>   static const TypeInfo pci_bridge_type_info = {
>>>>>       .name = TYPE_PCI_BRIDGE,
>>>>>       .parent = TYPE_PCI_DEVICE,
>>>>> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
>>>>> index ff7cbaa..c9f642c 100644
>>>>> --- a/include/hw/pci/pci_bridge.h
>>>>> +++ b/include/hw/pci/pci_bridge.h
>>>>> @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const
>>>>> char* bus_name,
>>>>>   #define  PCI_BRIDGE_CTL_DISCARD_STATUS       0x400   /* Discard
>>>>> timer status */
>>>>>   #define  PCI_BRIDGE_CTL_DISCARD_SERR 0x800   /* Discard timer
>>>>> SERR# enable */
>>>>>
>>>>> +typedef struct PCIBridgeQemuCap {
>>>>> +    uint8_t id;     /* Standard PCI capability header field */
>>>>> +    uint8_t next;   /* Standard PCI capability header field */
>>>>> +    uint8_t len;    /* Standard PCI vendor-specific capability
>>>>> header field */
>>>>> +    uint8_t bus_res;
>>>>> +    uint32_t pref_lim_upper;
>>>>
>>>> Big endian? Ugh.
>>>>
>>>
>>> Agreed, and this's gonna to disappear with
>>> the new layout.
>>>
>>>>> +    uint16_t pref_lim;
>>>>> +    uint16_t mem_lim;
>>>>
>>>> I'd say we need 64 bit for memory.
>>>>
>>>
>>> Why? Non-prefetchable MEMORY_LIMIT register is 16 bits long.
>>
>> Hmm ok, but e.g. for io there are bridges that have extra registers
>> to specify non-standard non-aligned registers.
>>
>>>>> +    uint16_t io_lim_upper;
>>>>> +    uint8_t io_lim;
>>>>> +    uint8_t padding;
>>>>
>>>> IMHO each type should have a special "don't care" flag
>>>> that would mean "I don't know".
>>>>
>>>>
>>>
>>> Don't know what? Now 0 is an indicator to do nothing with this field.
>>
>> In that case how do you say "don't allocate any memory"?
>>
> 
> We can keep the MEM/Limit registers read-only for such cases,
> as they are optional registers.

For OVMF, it would be really nice if OVMF could gather the reservation
numbers with
- read-only accesses
- to the one exact hotplug controller (root port, bridge, etc) that's
  being queried for reservation.

Deciding whether some registers in config space are r/o would require
OVMF to attempt a write and check the result (and if it succeeded, to
restore the original value). I'm not too attracted to doing this in a
platform hook, while PciBusDxe is in the middle of enumerating /
configuring the PCI(e) hierarchy :)

Thanks
Laszlo
Michael S. Tsirkin July 28, 2017, 11:12 p.m. UTC | #10
On Thu, Jul 27, 2017 at 12:39:54PM +0300, Marcel Apfelbaum wrote:
> On 27/07/2017 2:28, Michael S. Tsirkin wrote:
> > On Thu, Jul 27, 2017 at 12:54:07AM +0300, Alexander Bezzubikov wrote:
> > > 2017-07-26 22:43 GMT+03:00 Michael S. Tsirkin <mst@redhat.com>:
> > > > On Sun, Jul 23, 2017 at 01:15:41AM +0300, Aleksandr Bezzubikov wrote:
> > > > > On PCI init PCI bridges may need some
> > > > > extra info about bus number to reserve, IO, memory and
> > > > > prefetchable memory limits. QEMU can provide this
> > > > > with special
> > > > 
> > > > with a special
> > > > 
> > > > > vendor-specific PCI capability.
> > > > > 
> > > > > Sizes of limits match ones from
> > > > > PCI Type 1 Configuration Space Header,
> > > > > number of buses to reserve occupies only 1 byte
> > > > > since it is the size of Subordinate Bus Number register.
> > > > > 
> > > > > Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
> > > > > ---
> > > > >   hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
> > > > >   include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
> > > > >   2 files changed, 45 insertions(+)
> > > > > 
> > > > > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > > > > index 720119b..8ec6c2c 100644
> > > > > --- a/hw/pci/pci_bridge.c
> > > > > +++ b/hw/pci/pci_bridge.c
> > > > > @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
> > > > >       br->bus_name = bus_name;
> > > > >   }
> > > > > 
> > > > > +
> > > > > +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
> > > > 
> > > > help? should be qemu_cap_init?
> > > > 
> > > > > +                              uint8_t bus_reserve, uint32_t io_limit,
> > > > > +                              uint16_t mem_limit, uint64_t pref_limit,
> > > > > +                              Error **errp)
> > > > > +{
> > > > > +    size_t cap_len = sizeof(PCIBridgeQemuCap);
> > > > > +    PCIBridgeQemuCap cap;
> > > > 
> > > > This leaks info to guest. You want to init all fields here:
> > > > 
> > > > cap = {
> > > >   .len = ....
> > > > };
> > > 
> > > I surely can do this for len field, but as Laszlo proposed
> > > we can use mutually exclusive fields,
> > > e.g. pref_32 and pref_64, the only way I have left
> > > is to use ternary operator (if we surely need this
> > > big initializer). Keeping some if's would look better,
> > > I think.
> > > 
> > > > 
> > > > > +
> > > > > +    cap.len = cap_len;
> > > > > +    cap.bus_res = bus_reserve;
> > > > > +    cap.io_lim = io_limit & 0xFF;
> > > > > +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
> > > > > +    cap.mem_lim = mem_limit;
> > > > > +    cap.pref_lim = pref_limit & 0xFFFF;
> > > > > +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
> > > > 
> > > > Please use pci_set_word etc or cpu_to_leXX.
> > > > 
> > > 
> > > Since now we've decided to avoid fields separation into <field> + <field_upper>,
> > > this bitmask along with pci_set_word are no longer needed.
> > > 
> > > > I think it's easiest to replace struct with a set of macros then
> > > > pci_set_word does the work for you.
> > > > 
> > > 
> > > I don't really want to use macros here because structure
> > > show us the whole capability layout and this can
> > > decrease documenting efforts. More than that,
> > > memcpy usage is very convenient here, and I wouldn't like
> > > to lose it.
> > > 
> > > > 
> > > > > +
> > > > > +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
> > > > > +                                    cap_offset, cap_len, errp);
> > > > > +    if (offset < 0) {
> > > > > +        return offset;
> > > > > +    }
> > > > > +
> > > > > +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);
> > > > 
> > > > +2 is yacky. See how virtio does it:
> > > > 
> > > >      memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
> > > >             cap->cap_len - PCI_CAP_FLAGS);
> > > > 
> > > > 
> > > 
> > > OK.
> > > 
> > > > > +    return 0;
> > > > > +}
> > > > > +
> > > > >   static const TypeInfo pci_bridge_type_info = {
> > > > >       .name = TYPE_PCI_BRIDGE,
> > > > >       .parent = TYPE_PCI_DEVICE,
> > > > > diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
> > > > > index ff7cbaa..c9f642c 100644
> > > > > --- a/include/hw/pci/pci_bridge.h
> > > > > +++ b/include/hw/pci/pci_bridge.h
> > > > > @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
> > > > >   #define  PCI_BRIDGE_CTL_DISCARD_STATUS       0x400   /* Discard timer status */
> > > > >   #define  PCI_BRIDGE_CTL_DISCARD_SERR 0x800   /* Discard timer SERR# enable */
> > > > > 
> > > > > +typedef struct PCIBridgeQemuCap {
> > > > > +    uint8_t id;     /* Standard PCI capability header field */
> > > > > +    uint8_t next;   /* Standard PCI capability header field */
> > > > > +    uint8_t len;    /* Standard PCI vendor-specific capability header field */
> > > > > +    uint8_t bus_res;
> > > > > +    uint32_t pref_lim_upper;
> > > > 
> > > > Big endian? Ugh.
> > > > 
> > > 
> > > Agreed, and this's gonna to disappear with
> > > the new layout.
> > > 
> > > > > +    uint16_t pref_lim;
> > > > > +    uint16_t mem_lim;
> > > > 
> > > > I'd say we need 64 bit for memory.
> > > > 
> > > 
> > > Why? Non-prefetchable MEMORY_LIMIT register is 16 bits long.
> > 
> > Hmm ok, but e.g. for io there are bridges that have extra registers
> > to specify non-standard non-aligned registers.
> > 
> > > > > +    uint16_t io_lim_upper;
> > > > > +    uint8_t io_lim;
> > > > > +    uint8_t padding;
> > > > 
> > > > IMHO each type should have a special "don't care" flag
> > > > that would mean "I don't know".
> > > > 
> > > > 
> > > 
> > > Don't know what? Now 0 is an indicator to do nothing with this field.
> > 
> > In that case how do you say "don't allocate any memory"?
> > 
> 
> We can keep the MEM/Limit registers read-only for such cases,
> as they are optional registers.
> 
> Thanks,
> Marcel

I don't believe they are - from the spec (1.2):
	The Memory Base and Memory Limit registers are both required registers

Rest of ranges are indeed optional.


> > 
> > > > > +} PCIBridgeQemuCap;
> > > > 
> > > > You don't really need this struct in the header. And pls document all fields.
> > > > 
> > > > > +
> > > > > +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
> > > > > +                              uint8_t bus_reserve, uint32_t io_limit,
> > > > > +                              uint16_t mem_limit, uint64_t pref_limit,
> > > > > +                              Error **errp);
> > > > > +
> > > > >   #endif /* QEMU_PCI_BRIDGE_H */
> > > > > --
> > > > > 2.7.4
> > > 
> > > 
> > > 
> > > --
> > > Alexander Bezzubikov
Michael S. Tsirkin July 28, 2017, 11:15 p.m. UTC | #11
On Thu, Jul 27, 2017 at 03:58:58PM +0200, Laszlo Ersek wrote:
> On 07/27/17 11:39, Marcel Apfelbaum wrote:
> > On 27/07/2017 2:28, Michael S. Tsirkin wrote:
> >> On Thu, Jul 27, 2017 at 12:54:07AM +0300, Alexander Bezzubikov wrote:
> >>> 2017-07-26 22:43 GMT+03:00 Michael S. Tsirkin <mst@redhat.com>:
> >>>> On Sun, Jul 23, 2017 at 01:15:41AM +0300, Aleksandr Bezzubikov wrote:
> >>>>> On PCI init PCI bridges may need some
> >>>>> extra info about bus number to reserve, IO, memory and
> >>>>> prefetchable memory limits. QEMU can provide this
> >>>>> with special
> >>>>
> >>>> with a special
> >>>>
> >>>>> vendor-specific PCI capability.
> >>>>>
> >>>>> Sizes of limits match ones from
> >>>>> PCI Type 1 Configuration Space Header,
> >>>>> number of buses to reserve occupies only 1 byte
> >>>>> since it is the size of Subordinate Bus Number register.
> >>>>>
> >>>>> Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
> >>>>> ---
> >>>>>   hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
> >>>>>   include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
> >>>>>   2 files changed, 45 insertions(+)
> >>>>>
> >>>>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> >>>>> index 720119b..8ec6c2c 100644
> >>>>> --- a/hw/pci/pci_bridge.c
> >>>>> +++ b/hw/pci/pci_bridge.c
> >>>>> @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br, const
> >>>>> char* bus_name,
> >>>>>       br->bus_name = bus_name;
> >>>>>   }
> >>>>>
> >>>>> +
> >>>>> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
> >>>>
> >>>> help? should be qemu_cap_init?
> >>>>
> >>>>> +                              uint8_t bus_reserve, uint32_t io_limit,
> >>>>> +                              uint16_t mem_limit, uint64_t
> >>>>> pref_limit,
> >>>>> +                              Error **errp)
> >>>>> +{
> >>>>> +    size_t cap_len = sizeof(PCIBridgeQemuCap);
> >>>>> +    PCIBridgeQemuCap cap;
> >>>>
> >>>> This leaks info to guest. You want to init all fields here:
> >>>>
> >>>> cap = {
> >>>>   .len = ....
> >>>> };
> >>>
> >>> I surely can do this for len field, but as Laszlo proposed
> >>> we can use mutually exclusive fields,
> >>> e.g. pref_32 and pref_64, the only way I have left
> >>> is to use ternary operator (if we surely need this
> >>> big initializer). Keeping some if's would look better,
> >>> I think.
> >>>
> >>>>
> >>>>> +
> >>>>> +    cap.len = cap_len;
> >>>>> +    cap.bus_res = bus_reserve;
> >>>>> +    cap.io_lim = io_limit & 0xFF;
> >>>>> +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
> >>>>> +    cap.mem_lim = mem_limit;
> >>>>> +    cap.pref_lim = pref_limit & 0xFFFF;
> >>>>> +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
> >>>>
> >>>> Please use pci_set_word etc or cpu_to_leXX.
> >>>>
> >>>
> >>> Since now we've decided to avoid fields separation into <field> +
> >>> <field_upper>,
> >>> this bitmask along with pci_set_word are no longer needed.
> >>>
> >>>> I think it's easiest to replace struct with a set of macros then
> >>>> pci_set_word does the work for you.
> >>>>
> >>>
> >>> I don't really want to use macros here because structure
> >>> show us the whole capability layout and this can
> >>> decrease documenting efforts. More than that,
> >>> memcpy usage is very convenient here, and I wouldn't like
> >>> to lose it.
> >>>
> >>>>
> >>>>> +
> >>>>> +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
> >>>>> +                                    cap_offset, cap_len, errp);
> >>>>> +    if (offset < 0) {
> >>>>> +        return offset;
> >>>>> +    }
> >>>>> +
> >>>>> +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);
> >>>>
> >>>> +2 is yacky. See how virtio does it:
> >>>>
> >>>>      memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
> >>>>             cap->cap_len - PCI_CAP_FLAGS);
> >>>>
> >>>>
> >>>
> >>> OK.
> >>>
> >>>>> +    return 0;
> >>>>> +}
> >>>>> +
> >>>>>   static const TypeInfo pci_bridge_type_info = {
> >>>>>       .name = TYPE_PCI_BRIDGE,
> >>>>>       .parent = TYPE_PCI_DEVICE,
> >>>>> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
> >>>>> index ff7cbaa..c9f642c 100644
> >>>>> --- a/include/hw/pci/pci_bridge.h
> >>>>> +++ b/include/hw/pci/pci_bridge.h
> >>>>> @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const
> >>>>> char* bus_name,
> >>>>>   #define  PCI_BRIDGE_CTL_DISCARD_STATUS       0x400   /* Discard
> >>>>> timer status */
> >>>>>   #define  PCI_BRIDGE_CTL_DISCARD_SERR 0x800   /* Discard timer
> >>>>> SERR# enable */
> >>>>>
> >>>>> +typedef struct PCIBridgeQemuCap {
> >>>>> +    uint8_t id;     /* Standard PCI capability header field */
> >>>>> +    uint8_t next;   /* Standard PCI capability header field */
> >>>>> +    uint8_t len;    /* Standard PCI vendor-specific capability
> >>>>> header field */
> >>>>> +    uint8_t bus_res;
> >>>>> +    uint32_t pref_lim_upper;
> >>>>
> >>>> Big endian? Ugh.
> >>>>
> >>>
> >>> Agreed, and this's gonna to disappear with
> >>> the new layout.
> >>>
> >>>>> +    uint16_t pref_lim;
> >>>>> +    uint16_t mem_lim;
> >>>>
> >>>> I'd say we need 64 bit for memory.
> >>>>
> >>>
> >>> Why? Non-prefetchable MEMORY_LIMIT register is 16 bits long.
> >>
> >> Hmm ok, but e.g. for io there are bridges that have extra registers
> >> to specify non-standard non-aligned registers.
> >>
> >>>>> +    uint16_t io_lim_upper;
> >>>>> +    uint8_t io_lim;
> >>>>> +    uint8_t padding;
> >>>>
> >>>> IMHO each type should have a special "don't care" flag
> >>>> that would mean "I don't know".
> >>>>
> >>>>
> >>>
> >>> Don't know what? Now 0 is an indicator to do nothing with this field.
> >>
> >> In that case how do you say "don't allocate any memory"?
> >>
> > 
> > We can keep the MEM/Limit registers read-only for such cases,
> > as they are optional registers.
> 
> For OVMF, it would be really nice if OVMF could gather the reservation
> numbers with
> - read-only accesses
> - to the one exact hotplug controller (root port, bridge, etc) that's
>   being queried for reservation.
> 
> Deciding whether some registers in config space are r/o would require
> OVMF to attempt a write and check the result (and if it succeeded, to
> restore the original value). I'm not too attracted to doing this in a
> platform hook, while PciBusDxe is in the middle of enumerating /
> configuring the PCI(e) hierarchy :)
> 
> Thanks
> Laszlo

Well that's the PCI spec, I don't think there's a choice for
regular bridges. If we have this capability this is a possible
optimization.
But without it, if you assign ranges without checking whether they are
available they won't have any effect practically.
It's mostly harmless, but you are going to waste resources.
Marcel Apfelbaum July 31, 2017, 10:06 a.m. UTC | #12
On 29/07/2017 2:12, Michael S. Tsirkin wrote:
> On Thu, Jul 27, 2017 at 12:39:54PM +0300, Marcel Apfelbaum wrote:
>> On 27/07/2017 2:28, Michael S. Tsirkin wrote:
>>> On Thu, Jul 27, 2017 at 12:54:07AM +0300, Alexander Bezzubikov wrote:
>>>> 2017-07-26 22:43 GMT+03:00 Michael S. Tsirkin <mst@redhat.com>:
>>>>> On Sun, Jul 23, 2017 at 01:15:41AM +0300, Aleksandr Bezzubikov wrote:
>>>>>> On PCI init PCI bridges may need some
>>>>>> extra info about bus number to reserve, IO, memory and
>>>>>> prefetchable memory limits. QEMU can provide this
>>>>>> with special
>>>>>
>>>>> with a special
>>>>>
>>>>>> vendor-specific PCI capability.
>>>>>>
>>>>>> Sizes of limits match ones from
>>>>>> PCI Type 1 Configuration Space Header,
>>>>>> number of buses to reserve occupies only 1 byte
>>>>>> since it is the size of Subordinate Bus Number register.
>>>>>>
>>>>>> Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
>>>>>> ---
>>>>>>    hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
>>>>>>    include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
>>>>>>    2 files changed, 45 insertions(+)
>>>>>>
>>>>>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>>>>>> index 720119b..8ec6c2c 100644
>>>>>> --- a/hw/pci/pci_bridge.c
>>>>>> +++ b/hw/pci/pci_bridge.c
>>>>>> @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
>>>>>>        br->bus_name = bus_name;
>>>>>>    }
>>>>>>
>>>>>> +
>>>>>> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
>>>>>
>>>>> help? should be qemu_cap_init?
>>>>>
>>>>>> +                              uint8_t bus_reserve, uint32_t io_limit,
>>>>>> +                              uint16_t mem_limit, uint64_t pref_limit,
>>>>>> +                              Error **errp)
>>>>>> +{
>>>>>> +    size_t cap_len = sizeof(PCIBridgeQemuCap);
>>>>>> +    PCIBridgeQemuCap cap;
>>>>>
>>>>> This leaks info to guest. You want to init all fields here:
>>>>>
>>>>> cap = {
>>>>>    .len = ....
>>>>> };
>>>>
>>>> I surely can do this for len field, but as Laszlo proposed
>>>> we can use mutually exclusive fields,
>>>> e.g. pref_32 and pref_64, the only way I have left
>>>> is to use ternary operator (if we surely need this
>>>> big initializer). Keeping some if's would look better,
>>>> I think.
>>>>
>>>>>
>>>>>> +
>>>>>> +    cap.len = cap_len;
>>>>>> +    cap.bus_res = bus_reserve;
>>>>>> +    cap.io_lim = io_limit & 0xFF;
>>>>>> +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
>>>>>> +    cap.mem_lim = mem_limit;
>>>>>> +    cap.pref_lim = pref_limit & 0xFFFF;
>>>>>> +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
>>>>>
>>>>> Please use pci_set_word etc or cpu_to_leXX.
>>>>>
>>>>
>>>> Since now we've decided to avoid fields separation into <field> + <field_upper>,
>>>> this bitmask along with pci_set_word are no longer needed.
>>>>
>>>>> I think it's easiest to replace struct with a set of macros then
>>>>> pci_set_word does the work for you.
>>>>>
>>>>
>>>> I don't really want to use macros here because structure
>>>> show us the whole capability layout and this can
>>>> decrease documenting efforts. More than that,
>>>> memcpy usage is very convenient here, and I wouldn't like
>>>> to lose it.
>>>>
>>>>>
>>>>>> +
>>>>>> +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
>>>>>> +                                    cap_offset, cap_len, errp);
>>>>>> +    if (offset < 0) {
>>>>>> +        return offset;
>>>>>> +    }
>>>>>> +
>>>>>> +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);
>>>>>
>>>>> +2 is yacky. See how virtio does it:
>>>>>
>>>>>       memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
>>>>>              cap->cap_len - PCI_CAP_FLAGS);
>>>>>
>>>>>
>>>>
>>>> OK.
>>>>
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>>    static const TypeInfo pci_bridge_type_info = {
>>>>>>        .name = TYPE_PCI_BRIDGE,
>>>>>>        .parent = TYPE_PCI_DEVICE,
>>>>>> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
>>>>>> index ff7cbaa..c9f642c 100644
>>>>>> --- a/include/hw/pci/pci_bridge.h
>>>>>> +++ b/include/hw/pci/pci_bridge.h
>>>>>> @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
>>>>>>    #define  PCI_BRIDGE_CTL_DISCARD_STATUS       0x400   /* Discard timer status */
>>>>>>    #define  PCI_BRIDGE_CTL_DISCARD_SERR 0x800   /* Discard timer SERR# enable */
>>>>>>
>>>>>> +typedef struct PCIBridgeQemuCap {
>>>>>> +    uint8_t id;     /* Standard PCI capability header field */
>>>>>> +    uint8_t next;   /* Standard PCI capability header field */
>>>>>> +    uint8_t len;    /* Standard PCI vendor-specific capability header field */
>>>>>> +    uint8_t bus_res;
>>>>>> +    uint32_t pref_lim_upper;
>>>>>
>>>>> Big endian? Ugh.
>>>>>
>>>>
>>>> Agreed, and this's gonna to disappear with
>>>> the new layout.
>>>>
>>>>>> +    uint16_t pref_lim;
>>>>>> +    uint16_t mem_lim;
>>>>>
>>>>> I'd say we need 64 bit for memory.
>>>>>
>>>>
>>>> Why? Non-prefetchable MEMORY_LIMIT register is 16 bits long.
>>>
>>> Hmm ok, but e.g. for io there are bridges that have extra registers
>>> to specify non-standard non-aligned registers.
>>>
>>>>>> +    uint16_t io_lim_upper;
>>>>>> +    uint8_t io_lim;
>>>>>> +    uint8_t padding;
>>>>>
>>>>> IMHO each type should have a special "don't care" flag
>>>>> that would mean "I don't know".
>>>>>
>>>>>
>>>>
>>>> Don't know what? Now 0 is an indicator to do nothing with this field.
>>>
>>> In that case how do you say "don't allocate any memory"?
>>>
>>
>> We can keep the MEM/Limit registers read-only for such cases,
>> as they are optional registers.
>>
>> Thanks,
>> Marcel
>

Hi Michael,


> I don't believe they are - from the spec (1.2):
> 	The Memory Base and Memory Limit registers are both required registers
> 
> Rest of ranges are indeed optional.
> 


Oops, my mistake, I looked at prefetchable memory, sorry for the mix-up.
I wonder what would be the use case to disable IOMEM.

Anyway, we can have a "-1" default value giving the hint: do not reserve
anything, but in this case we should use it also for IO/prefetch mem.
The problem is, the guest OS can try to allocate IO space even if
the firmware didn't; Linux does it today.
Having IO-base/IO-limit registers read-only will solve the problem.

What am I getting at is we would have now 2 ways to solve the same
problem and the hint would be inferior.

I would use the hint to reserve a specific range and read only registers
to disable the range. IOMEM cannot be disabled in this way, but
should not bother us.

Thanks,
Marcel

> 
>>>
>>>>>> +} PCIBridgeQemuCap;
>>>>>
>>>>> You don't really need this struct in the header. And pls document all fields.
>>>>>
>>>>>> +
>>>>>> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
>>>>>> +                              uint8_t bus_reserve, uint32_t io_limit,
>>>>>> +                              uint16_t mem_limit, uint64_t pref_limit,
>>>>>> +                              Error **errp);
>>>>>> +
>>>>>>    #endif /* QEMU_PCI_BRIDGE_H */
>>>>>> --
>>>>>> 2.7.4
>>>>
>>>>
>>>>
>>>> --
>>>> Alexander Bezzubikov
Laszlo Ersek July 31, 2017, 6:16 p.m. UTC | #13
On 07/29/17 01:15, Michael S. Tsirkin wrote:
> On Thu, Jul 27, 2017 at 03:58:58PM +0200, Laszlo Ersek wrote:
>> On 07/27/17 11:39, Marcel Apfelbaum wrote:
>>> On 27/07/2017 2:28, Michael S. Tsirkin wrote:
>>>> On Thu, Jul 27, 2017 at 12:54:07AM +0300, Alexander Bezzubikov wrote:
>>>>> 2017-07-26 22:43 GMT+03:00 Michael S. Tsirkin <mst@redhat.com>:
>>>>>> On Sun, Jul 23, 2017 at 01:15:41AM +0300, Aleksandr Bezzubikov wrote:
>>>>>>> On PCI init PCI bridges may need some
>>>>>>> extra info about bus number to reserve, IO, memory and
>>>>>>> prefetchable memory limits. QEMU can provide this
>>>>>>> with special
>>>>>>
>>>>>> with a special
>>>>>>
>>>>>>> vendor-specific PCI capability.
>>>>>>>
>>>>>>> Sizes of limits match ones from
>>>>>>> PCI Type 1 Configuration Space Header,
>>>>>>> number of buses to reserve occupies only 1 byte
>>>>>>> since it is the size of Subordinate Bus Number register.
>>>>>>>
>>>>>>> Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
>>>>>>> ---
>>>>>>>   hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
>>>>>>>   include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
>>>>>>>   2 files changed, 45 insertions(+)
>>>>>>>
>>>>>>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>>>>>>> index 720119b..8ec6c2c 100644
>>>>>>> --- a/hw/pci/pci_bridge.c
>>>>>>> +++ b/hw/pci/pci_bridge.c
>>>>>>> @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br, const
>>>>>>> char* bus_name,
>>>>>>>       br->bus_name = bus_name;
>>>>>>>   }
>>>>>>>
>>>>>>> +
>>>>>>> +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
>>>>>>
>>>>>> help? should be qemu_cap_init?
>>>>>>
>>>>>>> +                              uint8_t bus_reserve, uint32_t io_limit,
>>>>>>> +                              uint16_t mem_limit, uint64_t
>>>>>>> pref_limit,
>>>>>>> +                              Error **errp)
>>>>>>> +{
>>>>>>> +    size_t cap_len = sizeof(PCIBridgeQemuCap);
>>>>>>> +    PCIBridgeQemuCap cap;
>>>>>>
>>>>>> This leaks info to guest. You want to init all fields here:
>>>>>>
>>>>>> cap = {
>>>>>>   .len = ....
>>>>>> };
>>>>>
>>>>> I surely can do this for len field, but as Laszlo proposed
>>>>> we can use mutually exclusive fields,
>>>>> e.g. pref_32 and pref_64, the only way I have left
>>>>> is to use ternary operator (if we surely need this
>>>>> big initializer). Keeping some if's would look better,
>>>>> I think.
>>>>>
>>>>>>
>>>>>>> +
>>>>>>> +    cap.len = cap_len;
>>>>>>> +    cap.bus_res = bus_reserve;
>>>>>>> +    cap.io_lim = io_limit & 0xFF;
>>>>>>> +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
>>>>>>> +    cap.mem_lim = mem_limit;
>>>>>>> +    cap.pref_lim = pref_limit & 0xFFFF;
>>>>>>> +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
>>>>>>
>>>>>> Please use pci_set_word etc or cpu_to_leXX.
>>>>>>
>>>>>
>>>>> Since now we've decided to avoid fields separation into <field> +
>>>>> <field_upper>,
>>>>> this bitmask along with pci_set_word are no longer needed.
>>>>>
>>>>>> I think it's easiest to replace struct with a set of macros then
>>>>>> pci_set_word does the work for you.
>>>>>>
>>>>>
>>>>> I don't really want to use macros here because structure
>>>>> show us the whole capability layout and this can
>>>>> decrease documenting efforts. More than that,
>>>>> memcpy usage is very convenient here, and I wouldn't like
>>>>> to lose it.
>>>>>
>>>>>>
>>>>>>> +
>>>>>>> +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
>>>>>>> +                                    cap_offset, cap_len, errp);
>>>>>>> +    if (offset < 0) {
>>>>>>> +        return offset;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);
>>>>>>
>>>>>> +2 is yacky. See how virtio does it:
>>>>>>
>>>>>>      memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
>>>>>>             cap->cap_len - PCI_CAP_FLAGS);
>>>>>>
>>>>>>
>>>>>
>>>>> OK.
>>>>>
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>>   static const TypeInfo pci_bridge_type_info = {
>>>>>>>       .name = TYPE_PCI_BRIDGE,
>>>>>>>       .parent = TYPE_PCI_DEVICE,
>>>>>>> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
>>>>>>> index ff7cbaa..c9f642c 100644
>>>>>>> --- a/include/hw/pci/pci_bridge.h
>>>>>>> +++ b/include/hw/pci/pci_bridge.h
>>>>>>> @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const
>>>>>>> char* bus_name,
>>>>>>>   #define  PCI_BRIDGE_CTL_DISCARD_STATUS       0x400   /* Discard
>>>>>>> timer status */
>>>>>>>   #define  PCI_BRIDGE_CTL_DISCARD_SERR 0x800   /* Discard timer
>>>>>>> SERR# enable */
>>>>>>>
>>>>>>> +typedef struct PCIBridgeQemuCap {
>>>>>>> +    uint8_t id;     /* Standard PCI capability header field */
>>>>>>> +    uint8_t next;   /* Standard PCI capability header field */
>>>>>>> +    uint8_t len;    /* Standard PCI vendor-specific capability
>>>>>>> header field */
>>>>>>> +    uint8_t bus_res;
>>>>>>> +    uint32_t pref_lim_upper;
>>>>>>
>>>>>> Big endian? Ugh.
>>>>>>
>>>>>
>>>>> Agreed, and this's gonna to disappear with
>>>>> the new layout.
>>>>>
>>>>>>> +    uint16_t pref_lim;
>>>>>>> +    uint16_t mem_lim;
>>>>>>
>>>>>> I'd say we need 64 bit for memory.
>>>>>>
>>>>>
>>>>> Why? Non-prefetchable MEMORY_LIMIT register is 16 bits long.
>>>>
>>>> Hmm ok, but e.g. for io there are bridges that have extra registers
>>>> to specify non-standard non-aligned registers.
>>>>
>>>>>>> +    uint16_t io_lim_upper;
>>>>>>> +    uint8_t io_lim;
>>>>>>> +    uint8_t padding;
>>>>>>
>>>>>> IMHO each type should have a special "don't care" flag
>>>>>> that would mean "I don't know".
>>>>>>
>>>>>>
>>>>>
>>>>> Don't know what? Now 0 is an indicator to do nothing with this field.
>>>>
>>>> In that case how do you say "don't allocate any memory"?
>>>>
>>>
>>> We can keep the MEM/Limit registers read-only for such cases,
>>> as they are optional registers.
>>
>> For OVMF, it would be really nice if OVMF could gather the reservation
>> numbers with
>> - read-only accesses
>> - to the one exact hotplug controller (root port, bridge, etc) that's
>>   being queried for reservation.
>>
>> Deciding whether some registers in config space are r/o would require
>> OVMF to attempt a write and check the result (and if it succeeded, to
>> restore the original value). I'm not too attracted to doing this in a
>> platform hook, while PciBusDxe is in the middle of enumerating /
>> configuring the PCI(e) hierarchy :)
>>
>> Thanks
>> Laszlo
> 
> Well that's the PCI spec, I don't think there's a choice for
> regular bridges. If we have this capability this is a possible
> optimization.
> But without it, if you assign ranges without checking whether they are
> available they won't have any effect practically.
> It's mostly harmless, but you are going to waste resources.
> 

OK. If the proposed solution with the r/o mem base/limit registers is
rooted in the spec (and I think it indeed must be; apparently this would
be the same as what we're already planning for IO disablement), then
that's a strong argument for PciBusDxe to accommodate this probing in
the platform hook.

Thanks
Laszlo
Michael S. Tsirkin July 31, 2017, 6:36 p.m. UTC | #14
On Mon, Jul 31, 2017 at 01:06:23PM +0300, Marcel Apfelbaum wrote:
> On 29/07/2017 2:12, Michael S. Tsirkin wrote:
> > On Thu, Jul 27, 2017 at 12:39:54PM +0300, Marcel Apfelbaum wrote:
> > > On 27/07/2017 2:28, Michael S. Tsirkin wrote:
> > > > On Thu, Jul 27, 2017 at 12:54:07AM +0300, Alexander Bezzubikov wrote:
> > > > > 2017-07-26 22:43 GMT+03:00 Michael S. Tsirkin <mst@redhat.com>:
> > > > > > On Sun, Jul 23, 2017 at 01:15:41AM +0300, Aleksandr Bezzubikov wrote:
> > > > > > > On PCI init PCI bridges may need some
> > > > > > > extra info about bus number to reserve, IO, memory and
> > > > > > > prefetchable memory limits. QEMU can provide this
> > > > > > > with special
> > > > > > 
> > > > > > with a special
> > > > > > 
> > > > > > > vendor-specific PCI capability.
> > > > > > > 
> > > > > > > Sizes of limits match ones from
> > > > > > > PCI Type 1 Configuration Space Header,
> > > > > > > number of buses to reserve occupies only 1 byte
> > > > > > > since it is the size of Subordinate Bus Number register.
> > > > > > > 
> > > > > > > Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
> > > > > > > ---
> > > > > > >    hw/pci/pci_bridge.c         | 27 +++++++++++++++++++++++++++
> > > > > > >    include/hw/pci/pci_bridge.h | 18 ++++++++++++++++++
> > > > > > >    2 files changed, 45 insertions(+)
> > > > > > > 
> > > > > > > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > > > > > > index 720119b..8ec6c2c 100644
> > > > > > > --- a/hw/pci/pci_bridge.c
> > > > > > > +++ b/hw/pci/pci_bridge.c
> > > > > > > @@ -408,6 +408,33 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
> > > > > > >        br->bus_name = bus_name;
> > > > > > >    }
> > > > > > > 
> > > > > > > +
> > > > > > > +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
> > > > > > 
> > > > > > help? should be qemu_cap_init?
> > > > > > 
> > > > > > > +                              uint8_t bus_reserve, uint32_t io_limit,
> > > > > > > +                              uint16_t mem_limit, uint64_t pref_limit,
> > > > > > > +                              Error **errp)
> > > > > > > +{
> > > > > > > +    size_t cap_len = sizeof(PCIBridgeQemuCap);
> > > > > > > +    PCIBridgeQemuCap cap;
> > > > > > 
> > > > > > This leaks info to guest. You want to init all fields here:
> > > > > > 
> > > > > > cap = {
> > > > > >    .len = ....
> > > > > > };
> > > > > 
> > > > > I surely can do this for len field, but as Laszlo proposed
> > > > > we can use mutually exclusive fields,
> > > > > e.g. pref_32 and pref_64, the only way I have left
> > > > > is to use ternary operator (if we surely need this
> > > > > big initializer). Keeping some if's would look better,
> > > > > I think.
> > > > > 
> > > > > > 
> > > > > > > +
> > > > > > > +    cap.len = cap_len;
> > > > > > > +    cap.bus_res = bus_reserve;
> > > > > > > +    cap.io_lim = io_limit & 0xFF;
> > > > > > > +    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
> > > > > > > +    cap.mem_lim = mem_limit;
> > > > > > > +    cap.pref_lim = pref_limit & 0xFFFF;
> > > > > > > +    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
> > > > > > 
> > > > > > Please use pci_set_word etc or cpu_to_leXX.
> > > > > > 
> > > > > 
> > > > > Since now we've decided to avoid fields separation into <field> + <field_upper>,
> > > > > this bitmask along with pci_set_word are no longer needed.
> > > > > 
> > > > > > I think it's easiest to replace struct with a set of macros then
> > > > > > pci_set_word does the work for you.
> > > > > > 
> > > > > 
> > > > > I don't really want to use macros here because structure
> > > > > show us the whole capability layout and this can
> > > > > decrease documenting efforts. More than that,
> > > > > memcpy usage is very convenient here, and I wouldn't like
> > > > > to lose it.
> > > > > 
> > > > > > 
> > > > > > > +
> > > > > > > +    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
> > > > > > > +                                    cap_offset, cap_len, errp);
> > > > > > > +    if (offset < 0) {
> > > > > > > +        return offset;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);
> > > > > > 
> > > > > > +2 is yacky. See how virtio does it:
> > > > > > 
> > > > > >       memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
> > > > > >              cap->cap_len - PCI_CAP_FLAGS);
> > > > > > 
> > > > > > 
> > > > > 
> > > > > OK.
> > > > > 
> > > > > > > +    return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > >    static const TypeInfo pci_bridge_type_info = {
> > > > > > >        .name = TYPE_PCI_BRIDGE,
> > > > > > >        .parent = TYPE_PCI_DEVICE,
> > > > > > > diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
> > > > > > > index ff7cbaa..c9f642c 100644
> > > > > > > --- a/include/hw/pci/pci_bridge.h
> > > > > > > +++ b/include/hw/pci/pci_bridge.h
> > > > > > > @@ -67,4 +67,22 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
> > > > > > >    #define  PCI_BRIDGE_CTL_DISCARD_STATUS       0x400   /* Discard timer status */
> > > > > > >    #define  PCI_BRIDGE_CTL_DISCARD_SERR 0x800   /* Discard timer SERR# enable */
> > > > > > > 
> > > > > > > +typedef struct PCIBridgeQemuCap {
> > > > > > > +    uint8_t id;     /* Standard PCI capability header field */
> > > > > > > +    uint8_t next;   /* Standard PCI capability header field */
> > > > > > > +    uint8_t len;    /* Standard PCI vendor-specific capability header field */
> > > > > > > +    uint8_t bus_res;
> > > > > > > +    uint32_t pref_lim_upper;
> > > > > > 
> > > > > > Big endian? Ugh.
> > > > > > 
> > > > > 
> > > > > Agreed, and this's gonna to disappear with
> > > > > the new layout.
> > > > > 
> > > > > > > +    uint16_t pref_lim;
> > > > > > > +    uint16_t mem_lim;
> > > > > > 
> > > > > > I'd say we need 64 bit for memory.
> > > > > > 
> > > > > 
> > > > > Why? Non-prefetchable MEMORY_LIMIT register is 16 bits long.
> > > > 
> > > > Hmm ok, but e.g. for io there are bridges that have extra registers
> > > > to specify non-standard non-aligned registers.
> > > > 
> > > > > > > +    uint16_t io_lim_upper;
> > > > > > > +    uint8_t io_lim;
> > > > > > > +    uint8_t padding;
> > > > > > 
> > > > > > IMHO each type should have a special "don't care" flag
> > > > > > that would mean "I don't know".
> > > > > > 
> > > > > > 
> > > > > 
> > > > > Don't know what? Now 0 is an indicator to do nothing with this field.
> > > > 
> > > > In that case how do you say "don't allocate any memory"?
> > > > 
> > > 
> > > We can keep the MEM/Limit registers read-only for such cases,
> > > as they are optional registers.
> > > 
> > > Thanks,
> > > Marcel
> > 
> 
> Hi Michael,
> 
> 
> > I don't believe they are - from the spec (1.2):
> > 	The Memory Base and Memory Limit registers are both required registers
> > 
> > Rest of ranges are indeed optional.
> > 
> 
> 
> Oops, my mistake, I looked at prefetchable memory, sorry for the mix-up.
> I wonder what would be the use case to disable IOMEM.
> 
> Anyway, we can have a "-1" default value giving the hint: do not reserve
> anything, but in this case we should use it also for IO/prefetch mem.
> The problem is, the guest OS can try to allocate IO space even if
> the firmware didn't; Linux does it today.
> Having IO-base/IO-limit registers read-only will solve the problem.

Makes sense.

> What am I getting at is we would have now 2 ways to solve the same
> problem and the hint would be inferior.
> 
> I would use the hint to reserve a specific range and read only registers
> to disable the range. IOMEM cannot be disabled in this way, but
> should not bother us.
> 
> Thanks,
> Marcel

I think Laszlo said that RO as an interface to indicate optional
range is tricky to use.

> > 
> > > > 
> > > > > > > +} PCIBridgeQemuCap;
> > > > > > 
> > > > > > You don't really need this struct in the header. And pls document all fields.
> > > > > > 
> > > > > > > +
> > > > > > > +int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
> > > > > > > +                              uint8_t bus_reserve, uint32_t io_limit,
> > > > > > > +                              uint16_t mem_limit, uint64_t pref_limit,
> > > > > > > +                              Error **errp);
> > > > > > > +
> > > > > > >    #endif /* QEMU_PCI_BRIDGE_H */
> > > > > > > --
> > > > > > > 2.7.4
> > > > > 
> > > > > 
> > > > > 
> > > > > --
> > > > > Alexander Bezzubikov
Michael S. Tsirkin July 31, 2017, 6:55 p.m. UTC | #15
On Mon, Jul 31, 2017 at 08:16:49PM +0200, Laszlo Ersek wrote:
> OK. If the proposed solution with the r/o mem base/limit registers is
> rooted in the spec (and I think it indeed must be; apparently this would
> be the same as what we're already planning for IO disablement), then
> that's a strong argument for PciBusDxe to accommodate this probing in
> the platform hook.
> 
> Thanks
> Laszlo

Do you mean making base/limit read-only?
Laszlo Ersek July 31, 2017, 7:04 p.m. UTC | #16
On 07/31/17 20:55, Michael S. Tsirkin wrote:
> On Mon, Jul 31, 2017 at 08:16:49PM +0200, Laszlo Ersek wrote:
>> OK. If the proposed solution with the r/o mem base/limit registers is
>> rooted in the spec (and I think it indeed must be; apparently this would
>> be the same as what we're already planning for IO disablement), then
>> that's a strong argument for PciBusDxe to accommodate this probing in
>> the platform hook.
>>
>> Thanks
>> Laszlo
> 
> Do you mean making base/limit read-only?

Yes, I do. (Perhaps writing "r/o" was too terse.)

Thanks
Laszlo
diff mbox

Patch

diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index 720119b..8ec6c2c 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -408,6 +408,33 @@  void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
     br->bus_name = bus_name;
 }
 
+
+int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
+                              uint8_t bus_reserve, uint32_t io_limit,
+                              uint16_t mem_limit, uint64_t pref_limit,
+                              Error **errp)
+{
+    size_t cap_len = sizeof(PCIBridgeQemuCap);
+    PCIBridgeQemuCap cap;
+
+    cap.len = cap_len;
+    cap.bus_res = bus_reserve;
+    cap.io_lim = io_limit & 0xFF;
+    cap.io_lim_upper = io_limit >> 8 & 0xFFFF;
+    cap.mem_lim = mem_limit;
+    cap.pref_lim = pref_limit & 0xFFFF;
+    cap.pref_lim_upper = pref_limit >> 16 & 0xFFFFFFFF;
+
+    int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
+                                    cap_offset, cap_len, errp);
+    if (offset < 0) {
+        return offset;
+    }
+
+    memcpy(dev->config + offset + 2, (char *)&cap + 2, cap_len - 2);
+    return 0;
+}
+
 static const TypeInfo pci_bridge_type_info = {
     .name = TYPE_PCI_BRIDGE,
     .parent = TYPE_PCI_DEVICE,
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index ff7cbaa..c9f642c 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -67,4 +67,22 @@  void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
 #define  PCI_BRIDGE_CTL_DISCARD_STATUS	0x400	/* Discard timer status */
 #define  PCI_BRIDGE_CTL_DISCARD_SERR	0x800	/* Discard timer SERR# enable */
 
+typedef struct PCIBridgeQemuCap {
+    uint8_t id;     /* Standard PCI capability header field */
+    uint8_t next;   /* Standard PCI capability header field */
+    uint8_t len;    /* Standard PCI vendor-specific capability header field */
+    uint8_t bus_res;
+    uint32_t pref_lim_upper;
+    uint16_t pref_lim;
+    uint16_t mem_lim;
+    uint16_t io_lim_upper;
+    uint8_t io_lim;
+    uint8_t padding;
+} PCIBridgeQemuCap;
+
+int pci_bridge_help_cap_init(PCIDevice *dev, int cap_offset,
+                              uint8_t bus_reserve, uint32_t io_limit,
+                              uint16_t mem_limit, uint64_t pref_limit,
+                              Error **errp);
+
 #endif /* QEMU_PCI_BRIDGE_H */