diff mbox

[v3,5/7] vmxnet3: The vmxnet3 device is a PCIE endpoint

Message ID 1449921644-31673-6-git-send-email-shmulik.ladkani@ravellosystems.com
State New
Headers show

Commit Message

Shmulik Ladkani Dec. 12, 2015, noon UTC
Report the 'express endpoint' capability if on a PCIE bus.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@ravellosystems.com>
---
 hw/net/vmxnet3.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

Comments

Jason Wang Dec. 14, 2015, 6:24 a.m. UTC | #1
On 12/12/2015 08:00 PM, Shmulik Ladkani wrote:
> Report the 'express endpoint' capability if on a PCIE bus.
>
> Signed-off-by: Shmulik Ladkani <shmulik.ladkani@ravellosystems.com>
> ---
>  hw/net/vmxnet3.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 52 insertions(+), 1 deletion(-)
>
> diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
> index 14d4dcb..7ded287 100644
> --- a/hw/net/vmxnet3.c
> +++ b/hw/net/vmxnet3.c
> @@ -40,7 +40,11 @@
>  #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
>  #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
>      (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
> +#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1
> +#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \
> +    (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT)
>  
> +#define VMXNET3_EXP_EP_OFFSET (0x48)
>  #define VMXNET3_MSI_OFFSET(s) \
>      ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
>  #define VMXNET3_MSIX_OFFSET(s) \
> @@ -121,6 +125,7 @@
>  
>  typedef struct VMXNET3Class {
>      PCIDeviceClass parent_class;
> +    DeviceRealize parent_dc_realize;
>  } VMXNET3Class;
>  
>  #define TYPE_VMXNET3 "vmxnet3"
> @@ -2256,6 +2261,10 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
>  
>      vmxnet3_net_init(s);
>  
> +    if (pci_is_express(pci_dev) && pci_bus_is_express(pci_dev->bus)) {
> +        pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET);
> +    }
> +
>      register_savevm(dev, "vmxnet3-msix", -1, 1,
>                      vmxnet3_msix_save, vmxnet3_msix_load, s);
>  }
> @@ -2525,6 +2534,29 @@ static const VMStateInfo int_state_info = {
>      .put = vmxnet3_put_int_state
>  };
>  
> +static bool vmxnet3_vmstate_need_pcie_device(void *opaque)
> +{
> +    VMXNET3State *s = VMXNET3(opaque);
> +
> +    return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE);
> +}
> +
> +static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id)
> +{
> +    return !vmxnet3_vmstate_need_pcie_device(opaque);
> +}
> +
> +static const VMStateDescription vmstate_vmxnet3_pcie_device = {
> +    .name = "vmxnet3/pcie",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = vmxnet3_vmstate_need_pcie_device,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_PCIE_DEVICE(parent_obj, VMXNET3State),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static const VMStateDescription vmstate_vmxnet3 = {
>      .name = "vmxnet3",
>      .version_id = 1,
> @@ -2532,7 +2564,9 @@ static const VMStateDescription vmstate_vmxnet3 = {
>      .pre_save = vmxnet3_pre_save,
>      .post_load = vmxnet3_post_load,
>      .fields = (VMStateField[]) {
> -            VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State),
> +            VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State,
> +                                vmxnet3_vmstate_test_pci_device, 0,
> +                                vmstate_pci_device, PCIDevice),
>              VMSTATE_BOOL(rx_packets_compound, VMXNET3State),
>              VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State),
>              VMSTATE_BOOL(lro_supported, VMXNET3State),
> @@ -2567,6 +2601,7 @@ static const VMStateDescription vmstate_vmxnet3 = {
>      },
>      .subsections = (const VMStateDescription*[]) {
>          &vmxstate_vmxnet3_mcast_list,
> +        &vmstate_vmxnet3_pcie_device,
>          NULL
>      }
>  };
> @@ -2578,10 +2613,24 @@ static Property vmxnet3_properties[] = {
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> +static void vmxnet3_realize(DeviceState *qdev, Error **errp)
> +{
> +    VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
> +    PCIDevice *pci_dev = PCI_DEVICE(qdev);
> +    VMXNET3State *s = VMXNET3(qdev);
> +
> +    if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
> +        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
> +    }

Looking at the other pci express device implementation (e.g nvme). Looks
like we can re-use the "is_express" property of PCIDeviceClass by
setting this to true in vmxnet3_class_init(). If this is ok, there's
probably no need for hacking like this.

> +
> +    vc->parent_dc_realize(qdev, errp);
> +}
> +
>  static void vmxnet3_class_init(ObjectClass *class, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(class);
>      PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
> +    VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class);
>  
>      c->realize = vmxnet3_pci_realize;
>      c->exit = vmxnet3_pci_uninit;
> @@ -2591,6 +2640,8 @@ static void vmxnet3_class_init(ObjectClass *class, void *data)
>      c->class_id = PCI_CLASS_NETWORK_ETHERNET;
>      c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
>      c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
> +    vc->parent_dc_realize = dc->realize;
> +    dc->realize = vmxnet3_realize;
>      dc->desc = "VMWare Paravirtualized Ethernet v3";
>      dc->reset = vmxnet3_qdev_reset;
>      dc->vmsd = &vmstate_vmxnet3;
Shmulik Ladkani Dec. 14, 2015, 7:32 a.m. UTC | #2
Thanks Jason,

On Mon, 14 Dec 2015 14:24:36 +0800, jasowang@redhat.com wrote:
> > +static void vmxnet3_realize(DeviceState *qdev, Error **errp)
> > +{
> > +    VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
> > +    PCIDevice *pci_dev = PCI_DEVICE(qdev);
> > +    VMXNET3State *s = VMXNET3(qdev);
> > +
> > +    if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
> > +        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
> > +    }
> 
> Looking at the other pci express device implementation (e.g nvme). Looks
> like we can re-use the "is_express" property of PCIDeviceClass by
> setting this to true in vmxnet3_class_init(). If this is ok, there's
> probably no need for hacking like this.

Yes, arming PCIDeviceClass.is_express in a 'class_init' method is the
classic way instructing the pci device to get the QEMU_PCI_CAP_EXPRESS
flag.
But this only works when 'is_express' is unconditionally true for all
instances of the class.

However in our case, we need to set it conditionally per instance,
according to the 'x-disable-pcie' device property.

My first attempt was setting QEMU_PCI_CAP_EXPRESS in
'vmxnet3_instance_init', which seems much cleaner (no need to introduce
the 'parent_dc_realize' hack).

This attempt failed, since at time of 'instance_init' invocation, the
device properties are NOT yet processed, so the 'compat_flags' isn't
set with the right value:

qdev_device_add()
   dev = DEVICE(object_new(driver));  # device creation
     object_new_with_type()
       object_initialize_with_type()
         object_init_with_type()
           ti->instance_init(obj);  # instance_init invoked at device creation
   ...
   qemu_opt_foreach(opts, set_property, dev, &err)  # sets properties
   ...

An alternative to introducing 'parent_dc_realize' was direct invocation
of 'pci_qdev_realize' within 'vmxnet3_realize', as I suggested in [1].
However this was rejected by Marcel Apfelbaum, as this might be error
prone since subclass (TYPE_VMXNET3) should have no direct knowledge
about its parent class's (TYPE_PCI_DEVICE) realize method, see [2].

Another attempt I've made is to indroduce a new type vmxnet3e (the
pcie variant of vmxnet3).
I dropped this approach since it was way too cumbersome, introducing
lots of boiler-plate code for the two (otherwise) identical types.

Since virtio-pci device needed the same fix (making it pcie without
breaking compat), and since the above approach was chosen
 (0560b0e virtio-pci: Set the QEMU_PCI_CAP_EXPRESS capability early in its DeviceClass realize method)
I've repeated same approach for vmxnet3.

I'm open to other suggestions, if we can come up with something cleaner
that fits all requirements.

Regards,
Shmulik

[1]
https://lists.nongnu.org/archive/html/qemu-devel/2015-12/msg00043.html

[2]
https://lists.nongnu.org/archive/html/qemu-devel/2015-12/msg00114.html
Jason Wang Dec. 15, 2015, 2:35 a.m. UTC | #3
On 12/14/2015 03:32 PM, Shmulik Ladkani wrote:
> Thanks Jason,
>
> On Mon, 14 Dec 2015 14:24:36 +0800, jasowang@redhat.com wrote:
>>> +static void vmxnet3_realize(DeviceState *qdev, Error **errp)
>>> +{
>>> +    VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
>>> +    PCIDevice *pci_dev = PCI_DEVICE(qdev);
>>> +    VMXNET3State *s = VMXNET3(qdev);
>>> +
>>> +    if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
>>> +        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
>>> +    }
>> Looking at the other pci express device implementation (e.g nvme). Looks
>> like we can re-use the "is_express" property of PCIDeviceClass by
>> setting this to true in vmxnet3_class_init(). If this is ok, there's
>> probably no need for hacking like this.
> Yes, arming PCIDeviceClass.is_express in a 'class_init' method is the
> classic way instructing the pci device to get the QEMU_PCI_CAP_EXPRESS
> flag.
> But this only works when 'is_express' is unconditionally true for all
> instances of the class.

I see.

>
> However in our case, we need to set it conditionally per instance,
> according to the 'x-disable-pcie' device property.
>
> My first attempt was setting QEMU_PCI_CAP_EXPRESS in
> 'vmxnet3_instance_init', which seems much cleaner (no need to introduce
> the 'parent_dc_realize' hack).
>
> This attempt failed, since at time of 'instance_init' invocation, the
> device properties are NOT yet processed, so the 'compat_flags' isn't
> set with the right value:
>
> qdev_device_add()
>    dev = DEVICE(object_new(driver));  # device creation
>      object_new_with_type()
>        object_initialize_with_type()
>          object_init_with_type()
>            ti->instance_init(obj);  # instance_init invoked at device creation
>    ...
>    qemu_opt_foreach(opts, set_property, dev, &err)  # sets properties
>    ...
>
> An alternative to introducing 'parent_dc_realize' was direct invocation
> of 'pci_qdev_realize' within 'vmxnet3_realize', as I suggested in [1].
> However this was rejected by Marcel Apfelbaum, as this might be error
> prone since subclass (TYPE_VMXNET3) should have no direct knowledge
> about its parent class's (TYPE_PCI_DEVICE) realize method, see [2].
>
> Another attempt I've made is to indroduce a new type vmxnet3e (the
> pcie variant of vmxnet3).
> I dropped this approach since it was way too cumbersome, introducing
> lots of boiler-plate code for the two (otherwise) identical types.

Yes, that's another solution (as I replied for patch 6). A question
here. If vmware differs pci-e version of vmxnet3 from pci version,
probably we need do the same (and you don't even need to care for
compatibility in the case). At a quick glance, no much duplicated codes.
(if you mean the msi offsets, you can let vmxnet3e use the new offset
unconditionally).

>
> Since virtio-pci device needed the same fix (making it pcie without
> breaking compat), and since the above approach was chosen
>  (0560b0e virtio-pci: Set the QEMU_PCI_CAP_EXPRESS capability early in its DeviceClass realize method)
> I've repeated same approach for vmxnet3.
>
> I'm open to other suggestions, if we can come up with something cleaner
> that fits all requirements.
>
> Regards,
> Shmulik
>
> [1]
> https://lists.nongnu.org/archive/html/qemu-devel/2015-12/msg00043.html
>
> [2]
> https://lists.nongnu.org/archive/html/qemu-devel/2015-12/msg00114.html
Shmulik Ladkani Dec. 15, 2015, 6:09 a.m. UTC | #4
Hi Jason,

On Tue, 15 Dec 2015 10:35:59 +0800 Jason Wang <jasowang@redhat.com> wrote:
> > Another attempt I've made is to indroduce a new type vmxnet3e (the
> > pcie variant of vmxnet3).
> > I dropped this approach since it was way too cumbersome, introducing
> > lots of boiler-plate code for the two (otherwise) identical types.
> 
> Yes, that's another solution (as I replied for patch 6). A question
> here. If vmware differs pci-e version of vmxnet3 from pci version,
> probably we need do the same (and you don't even need to care for
> compatibility in the case). At a quick glance, no much duplicated codes.
> (if you mean the msi offsets, you can let vmxnet3e use the new offset
> unconditionally).

Examples of duplicated boiler plate:

Split to a TYPE_VMXNET3_BASE abstract type having two concrete sub types.

Introduction of 'VMStateDescription vmstate_vmxnet3e' which differs only
due to its '.name' (must be the name of the type, i.e "vmxnet3e") and
the use of VMSTATE_PCIE_DEVICE (instead of VMSTATE_PCI_DEVICE), but
otherwise idential to existing 'VMStateDescription vmstate_vmxnet3'.

Introduction of 'VMStateDescription vmxstate_vmxnet3e_mcast_list' which
differs only by '.name' (must be "vmxnet3e/mcast_list" instead of
"vmxnet3/mcast_list") but otherwise identical to existing
'vmxstate_vmxnet3_mcast_list'.

Also, the vmxnet3 device is indeed a PCIE, and should have been so since
start.
The reason we're keeping the non-pcie variant is not since user would be
interested in an environment having the the non-pcie type, but only for
not breaking migration from old hardware versions.

Thus, suggesting 2 device types, providing the non-pcie variant as a
user visible type, exposes the user with a choice of selecting a type
which ideally shouldn't have existed at all.
This seems less preferrable.

Regards,
Shmulik
Jason Wang Dec. 15, 2015, 8:12 a.m. UTC | #5
On 12/15/2015 02:09 PM, Shmulik Ladkani wrote:
> Hi Jason,
>
> On Tue, 15 Dec 2015 10:35:59 +0800 Jason Wang <jasowang@redhat.com> wrote:
>>> Another attempt I've made is to indroduce a new type vmxnet3e (the
>>> pcie variant of vmxnet3).
>>> I dropped this approach since it was way too cumbersome, introducing
>>> lots of boiler-plate code for the two (otherwise) identical types.
>> Yes, that's another solution (as I replied for patch 6). A question
>> here. If vmware differs pci-e version of vmxnet3 from pci version,
>> probably we need do the same (and you don't even need to care for
>> compatibility in the case). At a quick glance, no much duplicated codes.
>> (if you mean the msi offsets, you can let vmxnet3e use the new offset
>> unconditionally).
> Examples of duplicated boiler plate:
>
> Split to a TYPE_VMXNET3_BASE abstract type having two concrete sub types.
>
> Introduction of 'VMStateDescription vmstate_vmxnet3e' which differs only
> due to its '.name' (must be the name of the type, i.e "vmxnet3e") and
> the use of VMSTATE_PCIE_DEVICE (instead of VMSTATE_PCI_DEVICE), but
> otherwise idential to existing 'VMStateDescription vmstate_vmxnet3'.
>
> Introduction of 'VMStateDescription vmxstate_vmxnet3e_mcast_list' which
> differs only by '.name' (must be "vmxnet3e/mcast_list" instead of
> "vmxnet3/mcast_list") but otherwise identical to existing
> 'vmxstate_vmxnet3_mcast_list'.
>
> Also, the vmxnet3 device is indeed a PCIE, and should have been so since
> start.

Yes, so this is a strong reason that we must not introduce a new type.

> The reason we're keeping the non-pcie variant is not since user would be
> interested in an environment having the the non-pcie type, but only for
> not breaking migration from old hardware versions.
>
> Thus, suggesting 2 device types, providing the non-pcie variant as a
> user visible type, exposes the user with a choice of selecting a type
> which ideally shouldn't have existed at all.
> This seems less preferrable.
>
> Regards,
> Shmulik
>

I get the point, thanks for the clarification.
diff mbox

Patch

diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 14d4dcb..7ded287 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -40,7 +40,11 @@ 
 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
     (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
+#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1
+#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \
+    (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT)
 
+#define VMXNET3_EXP_EP_OFFSET (0x48)
 #define VMXNET3_MSI_OFFSET(s) \
     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
 #define VMXNET3_MSIX_OFFSET(s) \
@@ -121,6 +125,7 @@ 
 
 typedef struct VMXNET3Class {
     PCIDeviceClass parent_class;
+    DeviceRealize parent_dc_realize;
 } VMXNET3Class;
 
 #define TYPE_VMXNET3 "vmxnet3"
@@ -2256,6 +2261,10 @@  static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
 
     vmxnet3_net_init(s);
 
+    if (pci_is_express(pci_dev) && pci_bus_is_express(pci_dev->bus)) {
+        pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET);
+    }
+
     register_savevm(dev, "vmxnet3-msix", -1, 1,
                     vmxnet3_msix_save, vmxnet3_msix_load, s);
 }
@@ -2525,6 +2534,29 @@  static const VMStateInfo int_state_info = {
     .put = vmxnet3_put_int_state
 };
 
+static bool vmxnet3_vmstate_need_pcie_device(void *opaque)
+{
+    VMXNET3State *s = VMXNET3(opaque);
+
+    return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE);
+}
+
+static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id)
+{
+    return !vmxnet3_vmstate_need_pcie_device(opaque);
+}
+
+static const VMStateDescription vmstate_vmxnet3_pcie_device = {
+    .name = "vmxnet3/pcie",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = vmxnet3_vmstate_need_pcie_device,
+    .fields = (VMStateField[]) {
+        VMSTATE_PCIE_DEVICE(parent_obj, VMXNET3State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_vmxnet3 = {
     .name = "vmxnet3",
     .version_id = 1,
@@ -2532,7 +2564,9 @@  static const VMStateDescription vmstate_vmxnet3 = {
     .pre_save = vmxnet3_pre_save,
     .post_load = vmxnet3_post_load,
     .fields = (VMStateField[]) {
-            VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State),
+            VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State,
+                                vmxnet3_vmstate_test_pci_device, 0,
+                                vmstate_pci_device, PCIDevice),
             VMSTATE_BOOL(rx_packets_compound, VMXNET3State),
             VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State),
             VMSTATE_BOOL(lro_supported, VMXNET3State),
@@ -2567,6 +2601,7 @@  static const VMStateDescription vmstate_vmxnet3 = {
     },
     .subsections = (const VMStateDescription*[]) {
         &vmxstate_vmxnet3_mcast_list,
+        &vmstate_vmxnet3_pcie_device,
         NULL
     }
 };
@@ -2578,10 +2613,24 @@  static Property vmxnet3_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static void vmxnet3_realize(DeviceState *qdev, Error **errp)
+{
+    VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
+    PCIDevice *pci_dev = PCI_DEVICE(qdev);
+    VMXNET3State *s = VMXNET3(qdev);
+
+    if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
+        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
+    }
+
+    vc->parent_dc_realize(qdev, errp);
+}
+
 static void vmxnet3_class_init(ObjectClass *class, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(class);
     PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
+    VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class);
 
     c->realize = vmxnet3_pci_realize;
     c->exit = vmxnet3_pci_uninit;
@@ -2591,6 +2640,8 @@  static void vmxnet3_class_init(ObjectClass *class, void *data)
     c->class_id = PCI_CLASS_NETWORK_ETHERNET;
     c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
     c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
+    vc->parent_dc_realize = dc->realize;
+    dc->realize = vmxnet3_realize;
     dc->desc = "VMWare Paravirtualized Ethernet v3";
     dc->reset = vmxnet3_qdev_reset;
     dc->vmsd = &vmstate_vmxnet3;