diff mbox

virtio-pci: Fix cross-version migration with older machines

Message ID 20161214125237.20850-1-maxime.coquelin@redhat.com
State New
Headers show

Commit Message

Maxime Coquelin Dec. 14, 2016, 12:52 p.m. UTC
This patch fixes a cross-version migration regression introduced
by commit d1b4259f ("virtio-bus: Plug devices after features are
negotiated").

The problem is encountered when host's vhost backend does not support
VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
machine with virtio-pci modern capabilities enabled to a v2.8 machine.

In this case, modern capabilities get exposed to the guest by the source,
whereas the target will detect version 1 is not supported so will only
expose legacy capabilities.

The problem is fixed by introducing a new "x-modern-broken" property,
which is set in v2.7 and prior compatibility modes. Doing this, v2.7
machine keeps its broken behaviour (enabling modern while version is
not supported), and newer machines will behave correctly.

Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---

I'm not sure about the property name, let me know if you have better ideas.
I didn't tested migration yet, but I wanted to share the patch while I test it.
I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
 - v2.8: Virtio-pci probe succeed
 - v2.7: Virtio-pci probe fails

Thanks,
Maxime

 hw/virtio/virtio-pci.c | 4 +++-
 hw/virtio/virtio-pci.h | 1 +
 include/hw/compat.h    | 4 ++++
 3 files changed, 8 insertions(+), 1 deletion(-)

Comments

Cornelia Huck Dec. 14, 2016, 1:12 p.m. UTC | #1
On Wed, 14 Dec 2016 13:52:37 +0100
Maxime Coquelin <maxime.coquelin@redhat.com> wrote:

> This patch fixes a cross-version migration regression introduced
> by commit d1b4259f ("virtio-bus: Plug devices after features are
> negotiated").
> 
> The problem is encountered when host's vhost backend does not support
> VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
> machine with virtio-pci modern capabilities enabled to a v2.8 machine.

Wait, machine versions or qemu versions?

> 
> In this case, modern capabilities get exposed to the guest by the source,
> whereas the target will detect version 1 is not supported so will only
> expose legacy capabilities.
> 
> The problem is fixed by introducing a new "x-modern-broken" property,
> which is set in v2.7 and prior compatibility modes. Doing this, v2.7
> machine keeps its broken behaviour (enabling modern while version is
> not supported), and newer machines will behave correctly.
> 
> Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
> Cc: Marcel Apfelbaum <marcel@redhat.com>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> 
> I'm not sure about the property name, let me know if you have better ideas.
> I didn't tested migration yet, but I wanted to share the patch while I test it.
> I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
>  - v2.8: Virtio-pci probe succeed
>  - v2.7: Virtio-pci probe fails
> 
> Thanks,
> Maxime
> 
>  hw/virtio/virtio-pci.c | 4 +++-
>  hw/virtio/virtio-pci.h | 1 +
>  include/hw/compat.h    | 4 ++++
>  3 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 521ba0b..93f6b54 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
>       * Virtio capabilities present without
>       * VIRTIO_F_VERSION_1 confuses guests
>       */
> -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> +    if (!proxy->modern_broken &&

What you are de facto doing here is ignoring the features supported by
the backend. Call this ->ignore_backend_features or so?

> +            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
>          virtio_pci_disable_modern(proxy);
> 
>          if (!legacy) {
> @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
>                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
>      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
>                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
> +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),

What about "x-ignore-backend-features"?

>      DEFINE_PROP_END_OF_LIST(),
>  };
> 
> diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
> index b2a996f..1dca223 100644
> --- a/hw/virtio/virtio-pci.h
> +++ b/hw/virtio/virtio-pci.h
> @@ -153,6 +153,7 @@ struct VirtIOPCIProxy {
>      int config_cap;
>      uint32_t flags;
>      bool disable_modern;
> +    bool modern_broken;
>      OnOffAuto disable_legacy;
>      uint32_t class_code;
>      uint32_t nvectors;
> diff --git a/include/hw/compat.h b/include/hw/compat.h
> index 0f06e11..fe11723 100644
> --- a/include/hw/compat.h
> +++ b/include/hw/compat.h
> @@ -18,6 +18,10 @@
>          .driver   = "intel-iommu",\
>          .property = "x-buggy-eim",\
>          .value    = "true",\
> +    },{\
> +        .driver   = "virtio-pci",\
> +        .property = "x-modern-broken",\
> +        .value    = "on",\

This unfortunately has the same problem wrt -global propagation as the
other virtio-pci compat props (unexpeced overrides). There's basically
zero chance, however, that somebody will want to set this property
manually (unline disable-legacy/disable-modern), and there's a patch
targeting 2.8.1, so I think we can live with it. (Alternatively, you
would need to set this property on all possible virtio-pci devices.)

>      },
> 
>  #define HW_COMPAT_2_6 \
Michael S. Tsirkin Dec. 14, 2016, 1:13 p.m. UTC | #2
On Wed, Dec 14, 2016 at 01:52:37PM +0100, Maxime Coquelin wrote:
> This patch fixes a cross-version migration regression introduced
> by commit d1b4259f ("virtio-bus: Plug devices after features are
> negotiated").
> 
> The problem is encountered when host's vhost backend does not support
> VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
> machine with virtio-pci modern capabilities enabled to a v2.8 machine.
> 
> In this case, modern capabilities get exposed to the guest by the source,
> whereas the target will detect version 1 is not supported so will only
> expose legacy capabilities.
> 
> The problem is fixed by introducing a new "x-modern-broken" property,
> which is set in v2.7 and prior compatibility modes. Doing this, v2.7
> machine keeps its broken behaviour (enabling modern while version is
> not supported), and newer machines will behave correctly.
> 
> Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
> Cc: Marcel Apfelbaum <marcel@redhat.com>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>

I wouldn't call it modern broken though. Modern works fine.
What this does is allow modern capability in legacy mode.

x-virtio-legacy-with-modern-cap

?




> ---
> 
> I'm not sure about the property name, let me know if you have better ideas.
> I didn't tested migration yet, but I wanted to share the patch while I test it.
> I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
>  - v2.8: Virtio-pci probe succeed
>  - v2.7: Virtio-pci probe fails
> 
> Thanks,
> Maxime
> 
>  hw/virtio/virtio-pci.c | 4 +++-
>  hw/virtio/virtio-pci.h | 1 +
>  include/hw/compat.h    | 4 ++++
>  3 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 521ba0b..93f6b54 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
>       * Virtio capabilities present without
>       * VIRTIO_F_VERSION_1 confuses guests
>       */
> -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> +    if (!proxy->modern_broken &&
> +            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
>          virtio_pci_disable_modern(proxy);
>  
>          if (!legacy) {
> @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
>                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
>      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
>                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
> +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
> index b2a996f..1dca223 100644
> --- a/hw/virtio/virtio-pci.h
> +++ b/hw/virtio/virtio-pci.h
> @@ -153,6 +153,7 @@ struct VirtIOPCIProxy {
>      int config_cap;
>      uint32_t flags;
>      bool disable_modern;
> +    bool modern_broken;
>      OnOffAuto disable_legacy;
>      uint32_t class_code;
>      uint32_t nvectors;
> diff --git a/include/hw/compat.h b/include/hw/compat.h
> index 0f06e11..fe11723 100644
> --- a/include/hw/compat.h
> +++ b/include/hw/compat.h
> @@ -18,6 +18,10 @@
>          .driver   = "intel-iommu",\
>          .property = "x-buggy-eim",\
>          .value    = "true",\
> +    },{\
> +        .driver   = "virtio-pci",\
> +        .property = "x-modern-broken",\
> +        .value    = "on",\
>      },
>  
>  #define HW_COMPAT_2_6 \
> -- 
> 2.9.3
Michael S. Tsirkin Dec. 14, 2016, 1:16 p.m. UTC | #3
On Wed, Dec 14, 2016 at 02:12:10PM +0100, Cornelia Huck wrote:
> On Wed, 14 Dec 2016 13:52:37 +0100
> Maxime Coquelin <maxime.coquelin@redhat.com> wrote:
> 
> > This patch fixes a cross-version migration regression introduced
> > by commit d1b4259f ("virtio-bus: Plug devices after features are
> > negotiated").
> > 
> > The problem is encountered when host's vhost backend does not support
> > VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
> > machine with virtio-pci modern capabilities enabled to a v2.8 machine.
> 
> Wait, machine versions or qemu versions?
> 
> > 
> > In this case, modern capabilities get exposed to the guest by the source,
> > whereas the target will detect version 1 is not supported so will only
> > expose legacy capabilities.
> > 
> > The problem is fixed by introducing a new "x-modern-broken" property,
> > which is set in v2.7 and prior compatibility modes. Doing this, v2.7
> > machine keeps its broken behaviour (enabling modern while version is
> > not supported), and newer machines will behave correctly.
> > 
> > Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
> > Cc: Michael S. Tsirkin <mst@redhat.com>
> > Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
> > Cc: Marcel Apfelbaum <marcel@redhat.com>
> > Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > ---
> > 
> > I'm not sure about the property name, let me know if you have better ideas.
> > I didn't tested migration yet, but I wanted to share the patch while I test it.
> > I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
> >  - v2.8: Virtio-pci probe succeed
> >  - v2.7: Virtio-pci probe fails
> > 
> > Thanks,
> > Maxime
> > 
> >  hw/virtio/virtio-pci.c | 4 +++-
> >  hw/virtio/virtio-pci.h | 1 +
> >  include/hw/compat.h    | 4 ++++
> >  3 files changed, 8 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > index 521ba0b..93f6b54 100644
> > --- a/hw/virtio/virtio-pci.c
> > +++ b/hw/virtio/virtio-pci.c
> > @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
> >       * Virtio capabilities present without
> >       * VIRTIO_F_VERSION_1 confuses guests
> >       */
> > -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> > +    if (!proxy->modern_broken &&
> 
> What you are de facto doing here is ignoring the features supported by
> the backend. Call this ->ignore_backend_features or so?
> 
> > +            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> >          virtio_pci_disable_modern(proxy);
> > 
> >          if (!legacy) {
> > @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
> >                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
> >      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
> >                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
> > +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),
> 
> What about "x-ignore-backend-features"?

It's just the capability that ignores that backend is legacy.

x-cap-ignore-backend-legacy

?

> >      DEFINE_PROP_END_OF_LIST(),
> >  };
> > 
> > diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
> > index b2a996f..1dca223 100644
> > --- a/hw/virtio/virtio-pci.h
> > +++ b/hw/virtio/virtio-pci.h
> > @@ -153,6 +153,7 @@ struct VirtIOPCIProxy {
> >      int config_cap;
> >      uint32_t flags;
> >      bool disable_modern;
> > +    bool modern_broken;
> >      OnOffAuto disable_legacy;
> >      uint32_t class_code;
> >      uint32_t nvectors;
> > diff --git a/include/hw/compat.h b/include/hw/compat.h
> > index 0f06e11..fe11723 100644
> > --- a/include/hw/compat.h
> > +++ b/include/hw/compat.h
> > @@ -18,6 +18,10 @@
> >          .driver   = "intel-iommu",\
> >          .property = "x-buggy-eim",\
> >          .value    = "true",\
> > +    },{\
> > +        .driver   = "virtio-pci",\
> > +        .property = "x-modern-broken",\
> > +        .value    = "on",\
> 
> This unfortunately has the same problem wrt -global propagation as the
> other virtio-pci compat props (unexpeced overrides). There's basically
> zero chance, however, that somebody will want to set this property
> manually (unline disable-legacy/disable-modern), and there's a patch
> targeting 2.8.1, so I think we can live with it. (Alternatively, you
> would need to set this property on all possible virtio-pci devices.)

YEs.

> >      },
> > 
> >  #define HW_COMPAT_2_6 \
Cornelia Huck Dec. 14, 2016, 1:21 p.m. UTC | #4
On Wed, 14 Dec 2016 15:16:13 +0200
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Wed, Dec 14, 2016 at 02:12:10PM +0100, Cornelia Huck wrote:
> > On Wed, 14 Dec 2016 13:52:37 +0100
> > Maxime Coquelin <maxime.coquelin@redhat.com> wrote:
> > 
> > > This patch fixes a cross-version migration regression introduced
> > > by commit d1b4259f ("virtio-bus: Plug devices after features are
> > > negotiated").
> > > 
> > > The problem is encountered when host's vhost backend does not support
> > > VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
> > > machine with virtio-pci modern capabilities enabled to a v2.8 machine.
> > 
> > Wait, machine versions or qemu versions?
> > 
> > > 
> > > In this case, modern capabilities get exposed to the guest by the source,
> > > whereas the target will detect version 1 is not supported so will only
> > > expose legacy capabilities.
> > > 
> > > The problem is fixed by introducing a new "x-modern-broken" property,
> > > which is set in v2.7 and prior compatibility modes. Doing this, v2.7
> > > machine keeps its broken behaviour (enabling modern while version is
> > > not supported), and newer machines will behave correctly.
> > > 
> > > Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
> > > Cc: Marcel Apfelbaum <marcel@redhat.com>
> > > Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > > ---
> > > 
> > > I'm not sure about the property name, let me know if you have better ideas.
> > > I didn't tested migration yet, but I wanted to share the patch while I test it.
> > > I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
> > >  - v2.8: Virtio-pci probe succeed
> > >  - v2.7: Virtio-pci probe fails
> > > 
> > > Thanks,
> > > Maxime
> > > 
> > >  hw/virtio/virtio-pci.c | 4 +++-
> > >  hw/virtio/virtio-pci.h | 1 +
> > >  include/hw/compat.h    | 4 ++++
> > >  3 files changed, 8 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > > index 521ba0b..93f6b54 100644
> > > --- a/hw/virtio/virtio-pci.c
> > > +++ b/hw/virtio/virtio-pci.c
> > > @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
> > >       * Virtio capabilities present without
> > >       * VIRTIO_F_VERSION_1 confuses guests
> > >       */
> > > -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> > > +    if (!proxy->modern_broken &&
> > 
> > What you are de facto doing here is ignoring the features supported by
> > the backend. Call this ->ignore_backend_features or so?
> > 
> > > +            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> > >          virtio_pci_disable_modern(proxy);
> > > 
> > >          if (!legacy) {
> > > @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
> > >                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
> > >      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
> > >                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
> > > +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),
> > 
> > What about "x-ignore-backend-features"?
> 
> It's just the capability that ignores that backend is legacy.
> 
> x-cap-ignore-backend-legacy
> 
> ?

Sounds good. And has the benefit that nobody will be tempted to set
that manually :)
Maxime Coquelin Dec. 14, 2016, 1:29 p.m. UTC | #5
On 12/14/2016 02:21 PM, Cornelia Huck wrote:
> On Wed, 14 Dec 2016 15:16:13 +0200
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
>
>> On Wed, Dec 14, 2016 at 02:12:10PM +0100, Cornelia Huck wrote:
>>> On Wed, 14 Dec 2016 13:52:37 +0100
>>> Maxime Coquelin <maxime.coquelin@redhat.com> wrote:
>>>
>>>> This patch fixes a cross-version migration regression introduced
>>>> by commit d1b4259f ("virtio-bus: Plug devices after features are
>>>> negotiated").
>>>>
>>>> The problem is encountered when host's vhost backend does not support
>>>> VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
>>>> machine with virtio-pci modern capabilities enabled to a v2.8 machine.
>>>
>>> Wait, machine versions or qemu versions?
>>>
>>>>
>>>> In this case, modern capabilities get exposed to the guest by the source,
>>>> whereas the target will detect version 1 is not supported so will only
>>>> expose legacy capabilities.
>>>>
>>>> The problem is fixed by introducing a new "x-modern-broken" property,
>>>> which is set in v2.7 and prior compatibility modes. Doing this, v2.7
>>>> machine keeps its broken behaviour (enabling modern while version is
>>>> not supported), and newer machines will behave correctly.
>>>>
>>>> Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
>>>> Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
>>>> Cc: Michael S. Tsirkin <mst@redhat.com>
>>>> Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
>>>> Cc: Marcel Apfelbaum <marcel@redhat.com>
>>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>> ---
>>>>
>>>> I'm not sure about the property name, let me know if you have better ideas.
>>>> I didn't tested migration yet, but I wanted to share the patch while I test it.
>>>> I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
>>>>  - v2.8: Virtio-pci probe succeed
>>>>  - v2.7: Virtio-pci probe fails
>>>>
>>>> Thanks,
>>>> Maxime
>>>>
>>>>  hw/virtio/virtio-pci.c | 4 +++-
>>>>  hw/virtio/virtio-pci.h | 1 +
>>>>  include/hw/compat.h    | 4 ++++
>>>>  3 files changed, 8 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>>>> index 521ba0b..93f6b54 100644
>>>> --- a/hw/virtio/virtio-pci.c
>>>> +++ b/hw/virtio/virtio-pci.c
>>>> @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
>>>>       * Virtio capabilities present without
>>>>       * VIRTIO_F_VERSION_1 confuses guests
>>>>       */
>>>> -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
>>>> +    if (!proxy->modern_broken &&
>>>
>>> What you are de facto doing here is ignoring the features supported by
>>> the backend. Call this ->ignore_backend_features or so?
>>>
>>>> +            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
>>>>          virtio_pci_disable_modern(proxy);
>>>>
>>>>          if (!legacy) {
>>>> @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
>>>>                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
>>>>      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
>>>>                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
>>>> +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),
>>>
>>> What about "x-ignore-backend-features"?
>>
>> It's just the capability that ignores that backend is legacy.
>>
>> x-cap-ignore-backend-legacy
>>
>> ?
>
> Sounds good. And has the benefit that nobody will be tempted to set
> that manually :)

Thanks Michael & Cornelia, you're definitely better than me at naming
things :)

I'll send the v2 using this nameing as soon as I get migration case
tested.

  - Maxime
Marcel Apfelbaum Dec. 14, 2016, 1:31 p.m. UTC | #6
On 12/14/2016 02:52 PM, Maxime Coquelin wrote:
> This patch fixes a cross-version migration regression introduced
> by commit d1b4259f ("virtio-bus: Plug devices after features are
> negotiated").
>
> The problem is encountered when host's vhost backend does not support
> VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
> machine with virtio-pci modern capabilities enabled to a v2.8 machine.
>
> In this case, modern capabilities get exposed to the guest by the source,
> whereas the target will detect version 1 is not supported so will only
> expose legacy capabilities.
>
> The problem is fixed by introducing a new "x-modern-broken" property,
> which is set in v2.7 and prior compatibility modes. Doing this, v2.7
> machine keeps its broken behaviour (enabling modern while version is
> not supported), and newer machines will behave correctly.
>
> Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
> Cc: Marcel Apfelbaum <marcel@redhat.com>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>
> I'm not sure about the property name, let me know if you have better ideas.
> I didn't tested migration yet, but I wanted to share the patch while I test it.
> I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
>  - v2.8: Virtio-pci probe succeed
>  - v2.7: Virtio-pci probe fails
>
> Thanks,
> Maxime
>
>  hw/virtio/virtio-pci.c | 4 +++-
>  hw/virtio/virtio-pci.h | 1 +
>  include/hw/compat.h    | 4 ++++
>  3 files changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 521ba0b..93f6b54 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
>       * Virtio capabilities present without
>       * VIRTIO_F_VERSION_1 confuses guests
>       */
> -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> +    if (!proxy->modern_broken &&
> +            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
>          virtio_pci_disable_modern(proxy);
>
>          if (!legacy) {
> @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
>                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
>      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
>                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
> +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>
> diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
> index b2a996f..1dca223 100644
> --- a/hw/virtio/virtio-pci.h
> +++ b/hw/virtio/virtio-pci.h
> @@ -153,6 +153,7 @@ struct VirtIOPCIProxy {
>      int config_cap;
>      uint32_t flags;
>      bool disable_modern;
> +    bool modern_broken;
>      OnOffAuto disable_legacy;
>      uint32_t class_code;
>      uint32_t nvectors;
> diff --git a/include/hw/compat.h b/include/hw/compat.h
> index 0f06e11..fe11723 100644
> --- a/include/hw/compat.h
> +++ b/include/hw/compat.h
> @@ -18,6 +18,10 @@
>          .driver   = "intel-iommu",\
>          .property = "x-buggy-eim",\
>          .value    = "true",\
> +    },{\
> +        .driver   = "virtio-pci",\
> +        .property = "x-modern-broken",\
> +        .value    = "on",\
>      },
>
>  #define HW_COMPAT_2_6 \
>

Hi Maxime,
The patch looks good to me, I am neutral regarding the property name.

Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>

Thanks,
Marcel
Stefan Hajnoczi Dec. 14, 2016, 1:45 p.m. UTC | #7
On Wed, Dec 14, 2016 at 1:29 PM, Maxime Coquelin
<maxime.coquelin@redhat.com> wrote:
> On 12/14/2016 02:21 PM, Cornelia Huck wrote:
>>
>> On Wed, 14 Dec 2016 15:16:13 +0200
>> "Michael S. Tsirkin" <mst@redhat.com> wrote:
>>
>>> On Wed, Dec 14, 2016 at 02:12:10PM +0100, Cornelia Huck wrote:
>>>>
>>>> On Wed, 14 Dec 2016 13:52:37 +0100
>>>> Maxime Coquelin <maxime.coquelin@redhat.com> wrote:
>>>>
>>>>> This patch fixes a cross-version migration regression introduced
>>>>> by commit d1b4259f ("virtio-bus: Plug devices after features are
>>>>> negotiated").
>>>>>
>>>>> The problem is encountered when host's vhost backend does not support
>>>>> VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
>>>>> machine with virtio-pci modern capabilities enabled to a v2.8 machine.
>>>>
>>>>
>>>> Wait, machine versions or qemu versions?
>>>>
>>>>>
>>>>> In this case, modern capabilities get exposed to the guest by the
>>>>> source,
>>>>> whereas the target will detect version 1 is not supported so will only
>>>>> expose legacy capabilities.
>>>>>
>>>>> The problem is fixed by introducing a new "x-modern-broken" property,
>>>>> which is set in v2.7 and prior compatibility modes. Doing this, v2.7
>>>>> machine keeps its broken behaviour (enabling modern while version is
>>>>> not supported), and newer machines will behave correctly.
>>>>>
>>>>> Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
>>>>> Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
>>>>> Cc: Michael S. Tsirkin <mst@redhat.com>
>>>>> Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
>>>>> Cc: Marcel Apfelbaum <marcel@redhat.com>
>>>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>>> ---
>>>>>
>>>>> I'm not sure about the property name, let me know if you have better
>>>>> ideas.
>>>>> I didn't tested migration yet, but I wanted to share the patch while I
>>>>> test it.
>>>>> I tested booting v2.8 and v2.7 machines with !VERSION_1 and get
>>>>> expected result:
>>>>>  - v2.8: Virtio-pci probe succeed
>>>>>  - v2.7: Virtio-pci probe fails
>>>>>
>>>>> Thanks,
>>>>> Maxime
>>>>>
>>>>>  hw/virtio/virtio-pci.c | 4 +++-
>>>>>  hw/virtio/virtio-pci.h | 1 +
>>>>>  include/hw/compat.h    | 4 ++++
>>>>>  3 files changed, 8 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>>>>> index 521ba0b..93f6b54 100644
>>>>> --- a/hw/virtio/virtio-pci.c
>>>>> +++ b/hw/virtio/virtio-pci.c
>>>>> @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState
>>>>> *d, Error **errp)
>>>>>       * Virtio capabilities present without
>>>>>       * VIRTIO_F_VERSION_1 confuses guests
>>>>>       */
>>>>> -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1))
>>>>> {
>>>>> +    if (!proxy->modern_broken &&
>>>>
>>>>
>>>> What you are de facto doing here is ignoring the features supported by
>>>> the backend. Call this ->ignore_backend_features or so?
>>>>
>>>>> +            !virtio_has_feature(vdev->host_features,
>>>>> VIRTIO_F_VERSION_1)) {
>>>>>          virtio_pci_disable_modern(proxy);
>>>>>
>>>>>          if (!legacy) {
>>>>> @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
>>>>>                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
>>>>>      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
>>>>>                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
>>>>> +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken,
>>>>> false),
>>>>
>>>>
>>>> What about "x-ignore-backend-features"?
>>>
>>>
>>> It's just the capability that ignores that backend is legacy.
>>>
>>> x-cap-ignore-backend-legacy
>>>
>>> ?
>>
>>
>> Sounds good. And has the benefit that nobody will be tempted to set
>> that manually :)
>
>
> Thanks Michael & Cornelia, you're definitely better than me at naming
> things :)
>
> I'll send the v2 using this nameing as soon as I get migration case
> tested.

Once the property is renamed...

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Michael Roth Dec. 14, 2016, 3:08 p.m. UTC | #8
Quoting Maxime Coquelin (2016-12-14 06:52:37)
> This patch fixes a cross-version migration regression introduced
> by commit d1b4259f ("virtio-bus: Plug devices after features are
> negotiated").
> 
> The problem is encountered when host's vhost backend does not support
> VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
> machine with virtio-pci modern capabilities enabled to a v2.8 machine.
> 
> In this case, modern capabilities get exposed to the guest by the source,
> whereas the target will detect version 1 is not supported so will only
> expose legacy capabilities.
> 
> The problem is fixed by introducing a new "x-modern-broken" property,
> which is set in v2.7 and prior compatibility modes. Doing this, v2.7
> machine keeps its broken behaviour (enabling modern while version is
> not supported), and newer machines will behave correctly.
> 
> Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
> Cc: Marcel Apfelbaum <marcel@redhat.com>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>

I can confirm this fixes the original issue I reported. I also did a
number of sanity runs with 2.7/2.8/pc-i440fx-2.{6,7,8} using various
combinations of disable-modern=true/false on hosts with/without virtio-1,
and some tests with pseries machines as well, and everything seems to
work.

Thanks for the quick fix!

> ---
> 
> I'm not sure about the property name, let me know if you have better ideas.
> I didn't tested migration yet, but I wanted to share the patch while I test it.
> I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
>  - v2.8: Virtio-pci probe succeed
>  - v2.7: Virtio-pci probe fails
> 
> Thanks,
> Maxime
> 
>  hw/virtio/virtio-pci.c | 4 +++-
>  hw/virtio/virtio-pci.h | 1 +
>  include/hw/compat.h    | 4 ++++
>  3 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 521ba0b..93f6b54 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
>       * Virtio capabilities present without
>       * VIRTIO_F_VERSION_1 confuses guests
>       */
> -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> +    if (!proxy->modern_broken &&
> +            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
>          virtio_pci_disable_modern(proxy);
> 
>          if (!legacy) {
> @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
>                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
>      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
>                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
> +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),
>      DEFINE_PROP_END_OF_LIST(),
>  };
> 
> diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
> index b2a996f..1dca223 100644
> --- a/hw/virtio/virtio-pci.h
> +++ b/hw/virtio/virtio-pci.h
> @@ -153,6 +153,7 @@ struct VirtIOPCIProxy {
>      int config_cap;
>      uint32_t flags;
>      bool disable_modern;
> +    bool modern_broken;
>      OnOffAuto disable_legacy;
>      uint32_t class_code;
>      uint32_t nvectors;
> diff --git a/include/hw/compat.h b/include/hw/compat.h
> index 0f06e11..fe11723 100644
> --- a/include/hw/compat.h
> +++ b/include/hw/compat.h
> @@ -18,6 +18,10 @@
>          .driver   = "intel-iommu",\
>          .property = "x-buggy-eim",\
>          .value    = "true",\
> +    },{\
> +        .driver   = "virtio-pci",\
> +        .property = "x-modern-broken",\
> +        .value    = "on",\
>      },
> 
>  #define HW_COMPAT_2_6 \
> -- 
> 2.9.3
>
Michael S. Tsirkin Dec. 14, 2016, 3:15 p.m. UTC | #9
On Wed, Dec 14, 2016 at 09:08:52AM -0600, Michael Roth wrote:
> Quoting Maxime Coquelin (2016-12-14 06:52:37)
> > This patch fixes a cross-version migration regression introduced
> > by commit d1b4259f ("virtio-bus: Plug devices after features are
> > negotiated").
> > 
> > The problem is encountered when host's vhost backend does not support
> > VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
> > machine with virtio-pci modern capabilities enabled to a v2.8 machine.
> > 
> > In this case, modern capabilities get exposed to the guest by the source,
> > whereas the target will detect version 1 is not supported so will only
> > expose legacy capabilities.
> > 
> > The problem is fixed by introducing a new "x-modern-broken" property,
> > which is set in v2.7 and prior compatibility modes. Doing this, v2.7
> > machine keeps its broken behaviour (enabling modern while version is
> > not supported), and newer machines will behave correctly.
> > 
> > Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
> > Cc: Michael S. Tsirkin <mst@redhat.com>
> > Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
> > Cc: Marcel Apfelbaum <marcel@redhat.com>
> > Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> 
> Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> 
> I can confirm this fixes the original issue I reported. I also did a
> number of sanity runs with 2.7/2.8/pc-i440fx-2.{6,7,8} using various
> combinations of disable-modern=true/false on hosts with/without virtio-1,
> and some tests with pseries machines as well, and everything seems to
> work.
> 
> Thanks for the quick fix!

FYI what I think does not work is a recent kernel on 2.7
machine type and host without virtio 1.
But this is not new.

> > ---
> > 
> > I'm not sure about the property name, let me know if you have better ideas.
> > I didn't tested migration yet, but I wanted to share the patch while I test it.
> > I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
> >  - v2.8: Virtio-pci probe succeed
> >  - v2.7: Virtio-pci probe fails
> > 
> > Thanks,
> > Maxime
> > 
> >  hw/virtio/virtio-pci.c | 4 +++-
> >  hw/virtio/virtio-pci.h | 1 +
> >  include/hw/compat.h    | 4 ++++
> >  3 files changed, 8 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > index 521ba0b..93f6b54 100644
> > --- a/hw/virtio/virtio-pci.c
> > +++ b/hw/virtio/virtio-pci.c
> > @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
> >       * Virtio capabilities present without
> >       * VIRTIO_F_VERSION_1 confuses guests
> >       */
> > -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> > +    if (!proxy->modern_broken &&
> > +            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> >          virtio_pci_disable_modern(proxy);
> > 
> >          if (!legacy) {
> > @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
> >                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
> >      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
> >                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
> > +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),
> >      DEFINE_PROP_END_OF_LIST(),
> >  };
> > 
> > diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
> > index b2a996f..1dca223 100644
> > --- a/hw/virtio/virtio-pci.h
> > +++ b/hw/virtio/virtio-pci.h
> > @@ -153,6 +153,7 @@ struct VirtIOPCIProxy {
> >      int config_cap;
> >      uint32_t flags;
> >      bool disable_modern;
> > +    bool modern_broken;
> >      OnOffAuto disable_legacy;
> >      uint32_t class_code;
> >      uint32_t nvectors;
> > diff --git a/include/hw/compat.h b/include/hw/compat.h
> > index 0f06e11..fe11723 100644
> > --- a/include/hw/compat.h
> > +++ b/include/hw/compat.h
> > @@ -18,6 +18,10 @@
> >          .driver   = "intel-iommu",\
> >          .property = "x-buggy-eim",\
> >          .value    = "true",\
> > +    },{\
> > +        .driver   = "virtio-pci",\
> > +        .property = "x-modern-broken",\
> > +        .value    = "on",\
> >      },
> > 
> >  #define HW_COMPAT_2_6 \
> > -- 
> > 2.9.3
> >
Maxime Coquelin Dec. 14, 2016, 3:20 p.m. UTC | #10
On 12/14/2016 04:15 PM, Michael S. Tsirkin wrote:
> On Wed, Dec 14, 2016 at 09:08:52AM -0600, Michael Roth wrote:
>> Quoting Maxime Coquelin (2016-12-14 06:52:37)
>>> This patch fixes a cross-version migration regression introduced
>>> by commit d1b4259f ("virtio-bus: Plug devices after features are
>>> negotiated").
>>>
>>> The problem is encountered when host's vhost backend does not support
>>> VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
>>> machine with virtio-pci modern capabilities enabled to a v2.8 machine.
>>>
>>> In this case, modern capabilities get exposed to the guest by the source,
>>> whereas the target will detect version 1 is not supported so will only
>>> expose legacy capabilities.
>>>
>>> The problem is fixed by introducing a new "x-modern-broken" property,
>>> which is set in v2.7 and prior compatibility modes. Doing this, v2.7
>>> machine keeps its broken behaviour (enabling modern while version is
>>> not supported), and newer machines will behave correctly.
>>>
>>> Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
>>> Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
>>> Cc: Michael S. Tsirkin <mst@redhat.com>
>>> Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
>>> Cc: Marcel Apfelbaum <marcel@redhat.com>
>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>
>> Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
>>
>> I can confirm this fixes the original issue I reported. I also did a
>> number of sanity runs with 2.7/2.8/pc-i440fx-2.{6,7,8} using various
>> combinations of disable-modern=true/false on hosts with/without virtio-1,
>> and some tests with pseries machines as well, and everything seems to
>> work.
>>
>> Thanks for the quick fix!
>
> FYI what I think does not work is a recent kernel on 2.7
> machine type and host without virtio 1.
> But this is not new.

I confirm, and as we discussed off-list, I will propose a kernel patch
to improve the situation, even if it will not fix current guests using
recent kernels.


>
>>> ---
>>>
>>> I'm not sure about the property name, let me know if you have better ideas.
>>> I didn't tested migration yet, but I wanted to share the patch while I test it.
>>> I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
>>>  - v2.8: Virtio-pci probe succeed
>>>  - v2.7: Virtio-pci probe fails
>>>
>>> Thanks,
>>> Maxime
>>>
>>>  hw/virtio/virtio-pci.c | 4 +++-
>>>  hw/virtio/virtio-pci.h | 1 +
>>>  include/hw/compat.h    | 4 ++++
>>>  3 files changed, 8 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>>> index 521ba0b..93f6b54 100644
>>> --- a/hw/virtio/virtio-pci.c
>>> +++ b/hw/virtio/virtio-pci.c
>>> @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
>>>       * Virtio capabilities present without
>>>       * VIRTIO_F_VERSION_1 confuses guests
>>>       */
>>> -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
>>> +    if (!proxy->modern_broken &&
>>> +            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
>>>          virtio_pci_disable_modern(proxy);
>>>
>>>          if (!legacy) {
>>> @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
>>>                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
>>>      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
>>>                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
>>> +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),
>>>      DEFINE_PROP_END_OF_LIST(),
>>>  };
>>>
>>> diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
>>> index b2a996f..1dca223 100644
>>> --- a/hw/virtio/virtio-pci.h
>>> +++ b/hw/virtio/virtio-pci.h
>>> @@ -153,6 +153,7 @@ struct VirtIOPCIProxy {
>>>      int config_cap;
>>>      uint32_t flags;
>>>      bool disable_modern;
>>> +    bool modern_broken;
>>>      OnOffAuto disable_legacy;
>>>      uint32_t class_code;
>>>      uint32_t nvectors;
>>> diff --git a/include/hw/compat.h b/include/hw/compat.h
>>> index 0f06e11..fe11723 100644
>>> --- a/include/hw/compat.h
>>> +++ b/include/hw/compat.h
>>> @@ -18,6 +18,10 @@
>>>          .driver   = "intel-iommu",\
>>>          .property = "x-buggy-eim",\
>>>          .value    = "true",\
>>> +    },{\
>>> +        .driver   = "virtio-pci",\
>>> +        .property = "x-modern-broken",\
>>> +        .value    = "on",\
>>>      },
>>>
>>>  #define HW_COMPAT_2_6 \
>>> --
>>> 2.9.3
>>>
Michael Roth Dec. 14, 2016, 4:02 p.m. UTC | #11
Quoting Michael S. Tsirkin (2016-12-14 09:15:38)
> On Wed, Dec 14, 2016 at 09:08:52AM -0600, Michael Roth wrote:
> > Quoting Maxime Coquelin (2016-12-14 06:52:37)
> > > This patch fixes a cross-version migration regression introduced
> > > by commit d1b4259f ("virtio-bus: Plug devices after features are
> > > negotiated").
> > > 
> > > The problem is encountered when host's vhost backend does not support
> > > VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
> > > machine with virtio-pci modern capabilities enabled to a v2.8 machine.
> > > 
> > > In this case, modern capabilities get exposed to the guest by the source,
> > > whereas the target will detect version 1 is not supported so will only
> > > expose legacy capabilities.
> > > 
> > > The problem is fixed by introducing a new "x-modern-broken" property,
> > > which is set in v2.7 and prior compatibility modes. Doing this, v2.7
> > > machine keeps its broken behaviour (enabling modern while version is
> > > not supported), and newer machines will behave correctly.
> > > 
> > > Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
> > > Cc: Marcel Apfelbaum <marcel@redhat.com>
> > > Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > 
> > Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > 
> > I can confirm this fixes the original issue I reported. I also did a
> > number of sanity runs with 2.7/2.8/pc-i440fx-2.{6,7,8} using various
> > combinations of disable-modern=true/false on hosts with/without virtio-1,
> > and some tests with pseries machines as well, and everything seems to
> > work.
> > 
> > Thanks for the quick fix!
> 
> FYI what I think does not work is a recent kernel on 2.7
> machine type and host without virtio 1.
> But this is not new.

To clarify I was only testing migration compatibility, I assume virtio
on 2.7 machines is still broken for the configuration you mentioned.

The migration tests I ran on the virtio-1 host do cover networking over
a virtio-net device before/after migration with reboots before/after
migration as well though, and the guest in those cases had a 4.8 kernel,
so I think the sanity checks I mentioned also apply for confirming
virtio-net probe is succeeding in the guest.

The non-virtio-1 runs are being done on my local machine and the tests
in that case are a bit more basic and don't involve actively testing
networking. I'll try some manual tests to check this. I guess the main
things to confirm on that front are that after the patch virtio probing:

  pc-i440fx-2.6, defaults -> succeeds
  pc-i440fx-2.6, disable-modern=true -> succeeds
  pc-i440fx-2.6, disable-modern=false -> fails

  pc-i440fx-2.7, defaults -> fails
  pc-i440fx-2.7, disable-modern=true -> succeeds
  pc-i440fx-2.7, disable-modern=false -> fails

  pc-i440fx-2.8, defaults -> succeeds
  pc-i440fx-2.8, disable-modern=true -> succeeds
  pc-i440fx-2.8, disable-modern=false -> succeeds

> 
> > > ---
> > > 
> > > I'm not sure about the property name, let me know if you have better ideas.
> > > I didn't tested migration yet, but I wanted to share the patch while I test it.
> > > I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
> > >  - v2.8: Virtio-pci probe succeed
> > >  - v2.7: Virtio-pci probe fails
> > > 
> > > Thanks,
> > > Maxime
> > > 
> > >  hw/virtio/virtio-pci.c | 4 +++-
> > >  hw/virtio/virtio-pci.h | 1 +
> > >  include/hw/compat.h    | 4 ++++
> > >  3 files changed, 8 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > > index 521ba0b..93f6b54 100644
> > > --- a/hw/virtio/virtio-pci.c
> > > +++ b/hw/virtio/virtio-pci.c
> > > @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
> > >       * Virtio capabilities present without
> > >       * VIRTIO_F_VERSION_1 confuses guests
> > >       */
> > > -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> > > +    if (!proxy->modern_broken &&
> > > +            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> > >          virtio_pci_disable_modern(proxy);
> > > 
> > >          if (!legacy) {
> > > @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
> > >                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
> > >      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
> > >                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
> > > +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),
> > >      DEFINE_PROP_END_OF_LIST(),
> > >  };
> > > 
> > > diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
> > > index b2a996f..1dca223 100644
> > > --- a/hw/virtio/virtio-pci.h
> > > +++ b/hw/virtio/virtio-pci.h
> > > @@ -153,6 +153,7 @@ struct VirtIOPCIProxy {
> > >      int config_cap;
> > >      uint32_t flags;
> > >      bool disable_modern;
> > > +    bool modern_broken;
> > >      OnOffAuto disable_legacy;
> > >      uint32_t class_code;
> > >      uint32_t nvectors;
> > > diff --git a/include/hw/compat.h b/include/hw/compat.h
> > > index 0f06e11..fe11723 100644
> > > --- a/include/hw/compat.h
> > > +++ b/include/hw/compat.h
> > > @@ -18,6 +18,10 @@
> > >          .driver   = "intel-iommu",\
> > >          .property = "x-buggy-eim",\
> > >          .value    = "true",\
> > > +    },{\
> > > +        .driver   = "virtio-pci",\
> > > +        .property = "x-modern-broken",\
> > > +        .value    = "on",\
> > >      },
> > > 
> > >  #define HW_COMPAT_2_6 \
> > > -- 
> > > 2.9.3
> > > 
>
Michael Roth Dec. 14, 2016, 4:53 p.m. UTC | #12
Quoting Michael Roth (2016-12-14 10:02:15)
> Quoting Michael S. Tsirkin (2016-12-14 09:15:38)
> > On Wed, Dec 14, 2016 at 09:08:52AM -0600, Michael Roth wrote:
> > > Quoting Maxime Coquelin (2016-12-14 06:52:37)
> > > > This patch fixes a cross-version migration regression introduced
> > > > by commit d1b4259f ("virtio-bus: Plug devices after features are
> > > > negotiated").
> > > > 
> > > > The problem is encountered when host's vhost backend does not support
> > > > VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
> > > > machine with virtio-pci modern capabilities enabled to a v2.8 machine.
> > > > 
> > > > In this case, modern capabilities get exposed to the guest by the source,
> > > > whereas the target will detect version 1 is not supported so will only
> > > > expose legacy capabilities.
> > > > 
> > > > The problem is fixed by introducing a new "x-modern-broken" property,
> > > > which is set in v2.7 and prior compatibility modes. Doing this, v2.7
> > > > machine keeps its broken behaviour (enabling modern while version is
> > > > not supported), and newer machines will behave correctly.
> > > > 
> > > > Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > > Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > > Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
> > > > Cc: Marcel Apfelbaum <marcel@redhat.com>
> > > > Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > > Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > > 
> > > Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > 
> > > I can confirm this fixes the original issue I reported. I also did a
> > > number of sanity runs with 2.7/2.8/pc-i440fx-2.{6,7,8} using various
> > > combinations of disable-modern=true/false on hosts with/without virtio-1,
> > > and some tests with pseries machines as well, and everything seems to
> > > work.
> > > 
> > > Thanks for the quick fix!
> > 
> > FYI what I think does not work is a recent kernel on 2.7
> > machine type and host without virtio 1.
> > But this is not new.
> 
> To clarify I was only testing migration compatibility, I assume virtio
> on 2.7 machines is still broken for the configuration you mentioned.
> 
> The migration tests I ran on the virtio-1 host do cover networking over
> a virtio-net device before/after migration with reboots before/after
> migration as well though, and the guest in those cases had a 4.8 kernel,
> so I think the sanity checks I mentioned also apply for confirming
> virtio-net probe is succeeding in the guest.
> 
> The non-virtio-1 runs are being done on my local machine and the tests
> in that case are a bit more basic and don't involve actively testing
> networking. I'll try some manual tests to check this. I guess the main
> things to confirm on that front are that after the patch virtio probing:
> 
>   pc-i440fx-2.6, defaults -> succeeds
>   pc-i440fx-2.6, disable-modern=true -> succeeds
>   pc-i440fx-2.6, disable-modern=false -> fails
> 
>   pc-i440fx-2.7, defaults -> fails
>   pc-i440fx-2.7, disable-modern=true -> succeeds
>   pc-i440fx-2.7, disable-modern=false -> fails
> 
>   pc-i440fx-2.8, defaults -> succeeds
>   pc-i440fx-2.8, disable-modern=true -> succeeds
>   pc-i440fx-2.8, disable-modern=false -> succeeds

I wasn't able to test disable-modern with pc-i440fx-2.6 due to the issue
being fixes by proposed patch "machine: Convert abstract typename on
compat_props to subclass names", but I think the rest of the cases align
with expectations:

2.6, defaults: succeeds

2.7, defaults: fails
2.7, disable-modern=true: succeeds
2.7, disable-modern=false: fails

2.8, defaults: succeeds
2.7, disable-modern=true: succeeds
2.7, disable-modern=false: succeeds

> 
> > 
> > > > ---
> > > > 
> > > > I'm not sure about the property name, let me know if you have better ideas.
> > > > I didn't tested migration yet, but I wanted to share the patch while I test it.
> > > > I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
> > > >  - v2.8: Virtio-pci probe succeed
> > > >  - v2.7: Virtio-pci probe fails
> > > > 
> > > > Thanks,
> > > > Maxime
> > > > 
> > > >  hw/virtio/virtio-pci.c | 4 +++-
> > > >  hw/virtio/virtio-pci.h | 1 +
> > > >  include/hw/compat.h    | 4 ++++
> > > >  3 files changed, 8 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > > > index 521ba0b..93f6b54 100644
> > > > --- a/hw/virtio/virtio-pci.c
> > > > +++ b/hw/virtio/virtio-pci.c
> > > > @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
> > > >       * Virtio capabilities present without
> > > >       * VIRTIO_F_VERSION_1 confuses guests
> > > >       */
> > > > -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> > > > +    if (!proxy->modern_broken &&
> > > > +            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> > > >          virtio_pci_disable_modern(proxy);
> > > > 
> > > >          if (!legacy) {
> > > > @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
> > > >                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
> > > >      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
> > > >                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
> > > > +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),
> > > >      DEFINE_PROP_END_OF_LIST(),
> > > >  };
> > > > 
> > > > diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
> > > > index b2a996f..1dca223 100644
> > > > --- a/hw/virtio/virtio-pci.h
> > > > +++ b/hw/virtio/virtio-pci.h
> > > > @@ -153,6 +153,7 @@ struct VirtIOPCIProxy {
> > > >      int config_cap;
> > > >      uint32_t flags;
> > > >      bool disable_modern;
> > > > +    bool modern_broken;
> > > >      OnOffAuto disable_legacy;
> > > >      uint32_t class_code;
> > > >      uint32_t nvectors;
> > > > diff --git a/include/hw/compat.h b/include/hw/compat.h
> > > > index 0f06e11..fe11723 100644
> > > > --- a/include/hw/compat.h
> > > > +++ b/include/hw/compat.h
> > > > @@ -18,6 +18,10 @@
> > > >          .driver   = "intel-iommu",\
> > > >          .property = "x-buggy-eim",\
> > > >          .value    = "true",\
> > > > +    },{\
> > > > +        .driver   = "virtio-pci",\
> > > > +        .property = "x-modern-broken",\
> > > > +        .value    = "on",\
> > > >      },
> > > > 
> > > >  #define HW_COMPAT_2_6 \
> > > > -- 
> > > > 2.9.3
> > > > 
> > 
> 
>
Michael Roth Dec. 14, 2016, 4:56 p.m. UTC | #13
Quoting Michael Roth (2016-12-14 10:53:41)
> Quoting Michael Roth (2016-12-14 10:02:15)
> > Quoting Michael S. Tsirkin (2016-12-14 09:15:38)
> > > On Wed, Dec 14, 2016 at 09:08:52AM -0600, Michael Roth wrote:
> > > > Quoting Maxime Coquelin (2016-12-14 06:52:37)
> > > > > This patch fixes a cross-version migration regression introduced
> > > > > by commit d1b4259f ("virtio-bus: Plug devices after features are
> > > > > negotiated").
> > > > > 
> > > > > The problem is encountered when host's vhost backend does not support
> > > > > VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
> > > > > machine with virtio-pci modern capabilities enabled to a v2.8 machine.
> > > > > 
> > > > > In this case, modern capabilities get exposed to the guest by the source,
> > > > > whereas the target will detect version 1 is not supported so will only
> > > > > expose legacy capabilities.
> > > > > 
> > > > > The problem is fixed by introducing a new "x-modern-broken" property,
> > > > > which is set in v2.7 and prior compatibility modes. Doing this, v2.7
> > > > > machine keeps its broken behaviour (enabling modern while version is
> > > > > not supported), and newer machines will behave correctly.
> > > > > 
> > > > > Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > > > Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > > > Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
> > > > > Cc: Marcel Apfelbaum <marcel@redhat.com>
> > > > > Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > > > Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > > > 
> > > > Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > > 
> > > > I can confirm this fixes the original issue I reported. I also did a
> > > > number of sanity runs with 2.7/2.8/pc-i440fx-2.{6,7,8} using various
> > > > combinations of disable-modern=true/false on hosts with/without virtio-1,
> > > > and some tests with pseries machines as well, and everything seems to
> > > > work.
> > > > 
> > > > Thanks for the quick fix!
> > > 
> > > FYI what I think does not work is a recent kernel on 2.7
> > > machine type and host without virtio 1.
> > > But this is not new.
> > 
> > To clarify I was only testing migration compatibility, I assume virtio
> > on 2.7 machines is still broken for the configuration you mentioned.
> > 
> > The migration tests I ran on the virtio-1 host do cover networking over
> > a virtio-net device before/after migration with reboots before/after
> > migration as well though, and the guest in those cases had a 4.8 kernel,
> > so I think the sanity checks I mentioned also apply for confirming
> > virtio-net probe is succeeding in the guest.
> > 
> > The non-virtio-1 runs are being done on my local machine and the tests
> > in that case are a bit more basic and don't involve actively testing
> > networking. I'll try some manual tests to check this. I guess the main
> > things to confirm on that front are that after the patch virtio probing:
> > 
> >   pc-i440fx-2.6, defaults -> succeeds
> >   pc-i440fx-2.6, disable-modern=true -> succeeds
> >   pc-i440fx-2.6, disable-modern=false -> fails
> > 
> >   pc-i440fx-2.7, defaults -> fails
> >   pc-i440fx-2.7, disable-modern=true -> succeeds
> >   pc-i440fx-2.7, disable-modern=false -> fails
> > 
> >   pc-i440fx-2.8, defaults -> succeeds
> >   pc-i440fx-2.8, disable-modern=true -> succeeds
> >   pc-i440fx-2.8, disable-modern=false -> succeeds
> 
> I wasn't able to test disable-modern with pc-i440fx-2.6 due to the issue
> being fixes by proposed patch "machine: Convert abstract typename on
> compat_props to subclass names", but I think the rest of the cases align
> with expectations:
> 
> 2.6, defaults: succeeds
> 
> 2.7, defaults: fails
> 2.7, disable-modern=true: succeeds
> 2.7, disable-modern=false: fails
> 
> 2.8, defaults: succeeds
> 2.7, disable-modern=true: succeeds
> 2.7, disable-modern=false: succeeds

Typo on the latter 2, these were for pc-i440fx-2.8 as well.

> 
> > 
> > > 
> > > > > ---
> > > > > 
> > > > > I'm not sure about the property name, let me know if you have better ideas.
> > > > > I didn't tested migration yet, but I wanted to share the patch while I test it.
> > > > > I tested booting v2.8 and v2.7 machines with !VERSION_1 and get expected result:
> > > > >  - v2.8: Virtio-pci probe succeed
> > > > >  - v2.7: Virtio-pci probe fails
> > > > > 
> > > > > Thanks,
> > > > > Maxime
> > > > > 
> > > > >  hw/virtio/virtio-pci.c | 4 +++-
> > > > >  hw/virtio/virtio-pci.h | 1 +
> > > > >  include/hw/compat.h    | 4 ++++
> > > > >  3 files changed, 8 insertions(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > > > > index 521ba0b..93f6b54 100644
> > > > > --- a/hw/virtio/virtio-pci.c
> > > > > +++ b/hw/virtio/virtio-pci.c
> > > > > @@ -1580,7 +1580,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
> > > > >       * Virtio capabilities present without
> > > > >       * VIRTIO_F_VERSION_1 confuses guests
> > > > >       */
> > > > > -    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> > > > > +    if (!proxy->modern_broken &&
> > > > > +            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
> > > > >          virtio_pci_disable_modern(proxy);
> > > > > 
> > > > >          if (!legacy) {
> > > > > @@ -1852,6 +1853,7 @@ static Property virtio_pci_properties[] = {
> > > > >                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
> > > > >      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
> > > > >                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
> > > > > +    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),
> > > > >      DEFINE_PROP_END_OF_LIST(),
> > > > >  };
> > > > > 
> > > > > diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
> > > > > index b2a996f..1dca223 100644
> > > > > --- a/hw/virtio/virtio-pci.h
> > > > > +++ b/hw/virtio/virtio-pci.h
> > > > > @@ -153,6 +153,7 @@ struct VirtIOPCIProxy {
> > > > >      int config_cap;
> > > > >      uint32_t flags;
> > > > >      bool disable_modern;
> > > > > +    bool modern_broken;
> > > > >      OnOffAuto disable_legacy;
> > > > >      uint32_t class_code;
> > > > >      uint32_t nvectors;
> > > > > diff --git a/include/hw/compat.h b/include/hw/compat.h
> > > > > index 0f06e11..fe11723 100644
> > > > > --- a/include/hw/compat.h
> > > > > +++ b/include/hw/compat.h
> > > > > @@ -18,6 +18,10 @@
> > > > >          .driver   = "intel-iommu",\
> > > > >          .property = "x-buggy-eim",\
> > > > >          .value    = "true",\
> > > > > +    },{\
> > > > > +        .driver   = "virtio-pci",\
> > > > > +        .property = "x-modern-broken",\
> > > > > +        .value    = "on",\
> > > > >      },
> > > > > 
> > > > >  #define HW_COMPAT_2_6 \
> > > > > -- 
> > > > > 2.9.3
> > > > > 
> > > 
> > 
> >
Maxime Coquelin Dec. 14, 2016, 4:57 p.m. UTC | #14
On 12/14/2016 05:56 PM, Michael Roth wrote:
> Quoting Michael Roth (2016-12-14 10:53:41)
>> > Quoting Michael Roth (2016-12-14 10:02:15)
>>> > > Quoting Michael S. Tsirkin (2016-12-14 09:15:38)
>>>> > > > On Wed, Dec 14, 2016 at 09:08:52AM -0600, Michael Roth wrote:
>>>>> > > > > Quoting Maxime Coquelin (2016-12-14 06:52:37)
>>>>>> > > > > > This patch fixes a cross-version migration regression introduced
>>>>>> > > > > > by commit d1b4259f ("virtio-bus: Plug devices after features are
>>>>>> > > > > > negotiated").
>>>>>> > > > > >
>>>>>> > > > > > The problem is encountered when host's vhost backend does not support
>>>>>> > > > > > VIRTIO_F_VERSION_1, and migration is initiated from a v2.7 or prior
>>>>>> > > > > > machine with virtio-pci modern capabilities enabled to a v2.8 machine.
>>>>>> > > > > >
>>>>>> > > > > > In this case, modern capabilities get exposed to the guest by the source,
>>>>>> > > > > > whereas the target will detect version 1 is not supported so will only
>>>>>> > > > > > expose legacy capabilities.
>>>>>> > > > > >
>>>>>> > > > > > The problem is fixed by introducing a new "x-modern-broken" property,
>>>>>> > > > > > which is set in v2.7 and prior compatibility modes. Doing this, v2.7
>>>>>> > > > > > machine keeps its broken behaviour (enabling modern while version is
>>>>>> > > > > > not supported), and newer machines will behave correctly.
>>>>>> > > > > >
>>>>>> > > > > > Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
>>>>>> > > > > > Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
>>>>>> > > > > > Cc: Michael S. Tsirkin <mst@redhat.com>
>>>>>> > > > > > Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
>>>>>> > > > > > Cc: Marcel Apfelbaum <marcel@redhat.com>
>>>>>> > > > > > Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>>>> > > > > > Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>>> > > > >
>>>>> > > > > Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
>>>>> > > > >
>>>>> > > > > I can confirm this fixes the original issue I reported. I also did a
>>>>> > > > > number of sanity runs with 2.7/2.8/pc-i440fx-2.{6,7,8} using various
>>>>> > > > > combinations of disable-modern=true/false on hosts with/without virtio-1,
>>>>> > > > > and some tests with pseries machines as well, and everything seems to
>>>>> > > > > work.
>>>>> > > > >
>>>>> > > > > Thanks for the quick fix!
>>>> > > >
>>>> > > > FYI what I think does not work is a recent kernel on 2.7
>>>> > > > machine type and host without virtio 1.
>>>> > > > But this is not new.
>>> > >
>>> > > To clarify I was only testing migration compatibility, I assume virtio
>>> > > on 2.7 machines is still broken for the configuration you mentioned.
>>> > >
>>> > > The migration tests I ran on the virtio-1 host do cover networking over
>>> > > a virtio-net device before/after migration with reboots before/after
>>> > > migration as well though, and the guest in those cases had a 4.8 kernel,
>>> > > so I think the sanity checks I mentioned also apply for confirming
>>> > > virtio-net probe is succeeding in the guest.
>>> > >
>>> > > The non-virtio-1 runs are being done on my local machine and the tests
>>> > > in that case are a bit more basic and don't involve actively testing
>>> > > networking. I'll try some manual tests to check this. I guess the main
>>> > > things to confirm on that front are that after the patch virtio probing:
>>> > >
>>> > >   pc-i440fx-2.6, defaults -> succeeds
>>> > >   pc-i440fx-2.6, disable-modern=true -> succeeds
>>> > >   pc-i440fx-2.6, disable-modern=false -> fails
>>> > >
>>> > >   pc-i440fx-2.7, defaults -> fails
>>> > >   pc-i440fx-2.7, disable-modern=true -> succeeds
>>> > >   pc-i440fx-2.7, disable-modern=false -> fails
>>> > >
>>> > >   pc-i440fx-2.8, defaults -> succeeds
>>> > >   pc-i440fx-2.8, disable-modern=true -> succeeds
>>> > >   pc-i440fx-2.8, disable-modern=false -> succeeds
>> >
>> > I wasn't able to test disable-modern with pc-i440fx-2.6 due to the issue
>> > being fixes by proposed patch "machine: Convert abstract typename on
>> > compat_props to subclass names", but I think the rest of the cases align
>> > with expectations:
>> >
>> > 2.6, defaults: succeeds
>> >
>> > 2.7, defaults: fails
>> > 2.7, disable-modern=true: succeeds
>> > 2.7, disable-modern=false: fails
>> >
>> > 2.8, defaults: succeeds
>> > 2.7, disable-modern=true: succeeds
>> > 2.7, disable-modern=false: succeeds
> Typo on the latter 2, these were for pc-i440fx-2.8 as well.

Thanks Michael, I confirm this is in line with expectations.

  - Maxime
diff mbox

Patch

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 521ba0b..93f6b54 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1580,7 +1580,8 @@  static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
      * Virtio capabilities present without
      * VIRTIO_F_VERSION_1 confuses guests
      */
-    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
+    if (!proxy->modern_broken &&
+            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
         virtio_pci_disable_modern(proxy);
 
         if (!legacy) {
@@ -1852,6 +1853,7 @@  static Property virtio_pci_properties[] = {
                     VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
     DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
                     VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
+    DEFINE_PROP_BOOL("x-modern-broken", VirtIOPCIProxy, modern_broken, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index b2a996f..1dca223 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -153,6 +153,7 @@  struct VirtIOPCIProxy {
     int config_cap;
     uint32_t flags;
     bool disable_modern;
+    bool modern_broken;
     OnOffAuto disable_legacy;
     uint32_t class_code;
     uint32_t nvectors;
diff --git a/include/hw/compat.h b/include/hw/compat.h
index 0f06e11..fe11723 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -18,6 +18,10 @@ 
         .driver   = "intel-iommu",\
         .property = "x-buggy-eim",\
         .value    = "true",\
+    },{\
+        .driver   = "virtio-pci",\
+        .property = "x-modern-broken",\
+        .value    = "on",\
     },
 
 #define HW_COMPAT_2_6 \