diff mbox series

[v1,20/23] xen platform: unplug ahci object

Message ID 9b8183903cbf20db4e2f0dafda9e0ed271a86a8e.1687278381.git.jupham125@gmail.com
State New
Headers show
Series Q35 support for Xen | expand

Commit Message

Joel Upham June 20, 2023, 5:24 p.m. UTC
This will unplug the ahci device when the Xen driver calls for an unplug.
This has been tested to work in linux and Windows guests.
When q35 is detected, we will remove the ahci controller
with the hard disks.  In the libxl config, cdrom devices
are put on a seperate ahci controller. This allows for 6 cdrom
devices to be added, and 6 qemu hard disks.


Signed-off-by: Joel Upham <jupham125@gmail.com>
---
 hw/i386/xen/xen_platform.c | 19 ++++++++++++++++++-
 hw/pci/pci.c               | 17 +++++++++++++++++
 include/hw/pci/pci.h       |  3 +++
 3 files changed, 38 insertions(+), 1 deletion(-)

Comments

Bernhard Beschow June 22, 2023, 5:40 a.m. UTC | #1
Am 20. Juni 2023 17:24:54 UTC schrieb Joel Upham <jupham125@gmail.com>:
>This will unplug the ahci device when the Xen driver calls for an unplug.
>This has been tested to work in linux and Windows guests.
>When q35 is detected, we will remove the ahci controller
>with the hard disks.  In the libxl config, cdrom devices
>are put on a seperate ahci controller. This allows for 6 cdrom
>devices to be added, and 6 qemu hard disks.

Does this also work with KVM Xen emulation? If so, the QEMU manual should be updated accordingly in this patch since it explicitly rules out Q35 due to missing AHCI unplug: https://gitlab.com/qemu-project/qemu/-/blob/stable-8.0/docs/system/i386/xen.rst?plain=1&ref_type=heads#L51

Best regards,
Bernhard

>
>
>Signed-off-by: Joel Upham <jupham125@gmail.com>
>---
> hw/i386/xen/xen_platform.c | 19 ++++++++++++++++++-
> hw/pci/pci.c               | 17 +++++++++++++++++
> include/hw/pci/pci.h       |  3 +++
> 3 files changed, 38 insertions(+), 1 deletion(-)
>
>diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
>index 57f1d742c1..0375337222 100644
>--- a/hw/i386/xen/xen_platform.c
>+++ b/hw/i386/xen/xen_platform.c
>@@ -34,6 +34,7 @@
> #include "sysemu/block-backend.h"
> #include "qemu/error-report.h"
> #include "qemu/module.h"
>+#include "include/hw/i386/pc.h"
> #include "qom/object.h"
> 
> #ifdef CONFIG_XEN
>@@ -223,6 +224,12 @@ static void unplug_disks(PCIBus *b, PCIDevice *d, void *opaque)
>         if (flags & UNPLUG_NVME_DISKS) {
>             object_unparent(OBJECT(d));
>         }
>+        break;
>+
>+    case PCI_CLASS_STORAGE_SATA:
>+	if (!aux) {
>+            object_unparent(OBJECT(d));
>+        }
> 
>     default:
>         break;
>@@ -231,7 +238,17 @@ static void unplug_disks(PCIBus *b, PCIDevice *d, void *opaque)
> 
> static void pci_unplug_disks(PCIBus *bus, uint32_t flags)
> {
>-    pci_for_each_device(bus, 0, unplug_disks, &flags);
>+    PCIBus *q35 = find_q35();
>+    if (q35) {
>+        /* When q35 is detected, we will remove the ahci controller
>+	 * with the hard disks.  In the libxl config, cdrom devices
>+	 * are put on a seperate ahci controller. This allows for 6 cdrom
>+	 * devices to be added, and 6 qemu hard disks.
>+	 */
>+        pci_function_for_one_bus(bus, unplug_disks, &flags);
>+    } else {
>+        pci_for_each_device(bus, 0, unplug_disks, &flags);
>+    }
> }
> 
> static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t val)
>diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>index 1cc7c89036..8eac3d751a 100644
>--- a/hw/pci/pci.c
>+++ b/hw/pci/pci.c
>@@ -1815,6 +1815,23 @@ void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
>     }
> }
> 
>+void pci_function_for_one_bus(PCIBus *bus,
>+                          void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
>+                          void *opaque)
>+{
>+    bus = pci_find_bus_nr(bus, 0);
>+
>+    if (bus) {
>+        PCIDevice *d;
>+
>+        d = bus->devices[PCI_DEVFN(4,0)];
>+        if (d) {
>+            fn(bus, d, opaque);
>+            return;
>+        }
>+    }
>+}
>+
> void pci_for_each_device_under_bus(PCIBus *bus,
>                                    pci_bus_dev_fn fn, void *opaque)
> {
>diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>index e6d0574a29..c53e21082a 100644
>--- a/include/hw/pci/pci.h
>+++ b/include/hw/pci/pci.h
>@@ -343,6 +343,9 @@ void pci_for_each_device_under_bus(PCIBus *bus,
> void pci_for_each_device_under_bus_reverse(PCIBus *bus,
>                                            pci_bus_dev_fn fn,
>                                            void *opaque);
>+void pci_function_for_one_bus(PCIBus *bus,
>+                         void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque),
>+                         void *opaque);
> void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin,
>                                   pci_bus_fn end, void *parent_state);
> PCIDevice *pci_get_function_0(PCIDevice *pci_dev);
David Woodhouse Oct. 19, 2023, 12:37 p.m. UTC | #2
On Thu, 2023-06-22 at 05:40 +0000, Bernhard Beschow wrote:
> Am 20. Juni 2023 17:24:54 UTC schrieb Joel Upham <jupham125@gmail.com>:
> > This will unplug the ahci device when the Xen driver calls for an unplug.
> > This has been tested to work in linux and Windows guests.
> > When q35 is detected, we will remove the ahci controller
> > with the hard disks.  In the libxl config, cdrom devices
> > are put on a seperate ahci controller. This allows for 6 cdrom
> > devices to be added, and 6 qemu hard disks.
> 
> Does this also work with KVM Xen emulation? If so, the QEMU manual
> should be updated accordingly in this patch since it explicitly rules
> out Q35 due to missing AHCI unplug:
> https://gitlab.com/qemu-project/qemu/-/blob/stable-8.0/docs/system/i386/xen.rst?plain=1&ref_type=heads#L51

No, it doesn't work. Those assumptions about the topology and which
disk/cdrom devices are attached to which AHCI device on which PCI bus,
are not valid in the general case. So if I boot an HVM guest thus...

 $ qemu-system-x86_64 -M q35 -m 1g -display none -serial mon:stdio \
     --accel kvm,xen-version=0x40011,kernel-irqchip=split \
     -drive file=${GUEST_IMAGE},if=xen \
     -drive file=${GUEST_IMAGE},file.locking=off,if=ide

... it still sees the AHCI disk when it boots:

[root@localhost ~]# cat /proc/partitions 
major minor  #blocks  name

 202        0   20971520 xvda
 202        1   18874368 xvda1
 202        2    2096128 xvda2
   8        0   20971520 sda
   8        1   18874368 sda1
   8        2    2096128 sda2
  11        0    1048575 sr0
[root@localhost ~]# lspci
00:00.0 Host bridge: Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller
00:01.0 Unassigned class [ff80]: XenSource, Inc. Xen Platform Device (rev 01)
00:02.0 VGA compatible controller: Device 1234:1111 (rev 02)
00:1f.0 ISA bridge: Intel Corporation 82801IB (ICH9) LPC Interface Controller (rev 02)
00:1f.2 SATA controller: Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] (rev 02)
00:1f.3 SMBus: Intel Corporation 82801I (ICH9 Family) SMBus Controller (rev 02)

We probably do need to iterate over the children of the PCI device and
selectively destroy them. Which can be the *same* for IDE and AHCI.
Patch below.

It would be slightly more natural to do ide_dev_unplug() with an
'if (!idedev) return;' but I've done it this way to keep the
indentation the same, and thus highlight that it's just using the
existing blk unplug magic. I kind of hate that we *need* that magic,
shouldn't object_unparent() Just Work™ and unwire everything?
(It *doesn't*; qemu later crashes. But I think it *should*).

As I'm literally about to hit send on this, I realise I messed up the
'aux' logic. But as a proof of concept for this approach, this works OK
for both pc and q35 machines with Xen emulation tested as in the above
command line. Feel free to use it as you see fit, to which end:

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>

--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -169,37 +169,49 @@ static void pci_unplug_nics(PCIBus *bus)
  *
  * [1] https://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=docs/misc/hvm-emulated-unplug.pandoc
  */
-static void pci_xen_ide_unplug(PCIDevice *d, bool aux)
+static int ide_dev_unplug(DeviceState *dev, void *opaque)
 {
-    DeviceState *dev = DEVICE(d);
-    PCIIDEState *pci_ide;
-    int i;
     IDEDevice *idedev;
     IDEBus *idebus;
     BlockBackend *blk;
+    int unit;
 
-    pci_ide = PCI_IDE(dev);
-
-    for (i = aux ? 1 : 0; i < 4; i++) {
-        idebus = &pci_ide->bus[i / 2];
-        blk = idebus->ifs[i % 2].blk;
+    idedev = IDE_DEVICE(object_dynamic_cast(OBJECT(dev), "ide-hd"));
+    if (idedev) {
+        idebus = IDE_BUS(qdev_get_parent_bus(dev));
 
-        if (blk && idebus->ifs[i % 2].drive_kind != IDE_CD) {
-            if (!(i % 2)) {
-                idedev = idebus->master;
-            } else {
-                idedev = idebus->slave;
-            }
+        unit = (idedev == idebus->slave);
+        assert(unit || idedev == idebus->master);
 
+        blk = idebus->ifs[unit].blk;
+        if (blk) {
             blk_drain(blk);
             blk_flush(blk);
 
             blk_detach_dev(blk, DEVICE(idedev));
-            idebus->ifs[i % 2].blk = NULL;
+            idebus->ifs[unit].blk = NULL;
             idedev->conf.blk = NULL;
             monitor_remove_blk(blk);
             blk_unref(blk);
         }
+
+        object_unparent(OBJECT(dev));
+    }
+
+    return 0;
+}
+
+static void pci_xen_ide_unplug(PCIDevice *d, bool aux)
+{
+    DeviceState *dev = DEVICE(d);
+
+    if (!aux) {
+        IDEBus *idebus = IDE_BUS(qdev_get_child_bus(DEVICE(dev), "ide.0"));
+        if (idebus && idebus->master) {
+            ide_dev_unplug(DEVICE(idebus->master), NULL);
+        }
+    } else {
+        qdev_walk_children(dev, NULL, NULL, ide_dev_unplug, NULL, NULL);
     }
     pci_device_reset(d);
 }
@@ -216,6 +228,7 @@ static void unplug_disks(PCIBus *b, PCIDevice *d, void *opaque)
 
     switch (pci_get_word(d->config + PCI_CLASS_DEVICE)) {
     case PCI_CLASS_STORAGE_IDE:
+    case PCI_CLASS_STORAGE_SATA:
         pci_xen_ide_unplug(d, aux);
         break;
diff mbox series

Patch

diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index 57f1d742c1..0375337222 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -34,6 +34,7 @@ 
 #include "sysemu/block-backend.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
+#include "include/hw/i386/pc.h"
 #include "qom/object.h"
 
 #ifdef CONFIG_XEN
@@ -223,6 +224,12 @@  static void unplug_disks(PCIBus *b, PCIDevice *d, void *opaque)
         if (flags & UNPLUG_NVME_DISKS) {
             object_unparent(OBJECT(d));
         }
+        break;
+
+    case PCI_CLASS_STORAGE_SATA:
+	if (!aux) {
+            object_unparent(OBJECT(d));
+        }
 
     default:
         break;
@@ -231,7 +238,17 @@  static void unplug_disks(PCIBus *b, PCIDevice *d, void *opaque)
 
 static void pci_unplug_disks(PCIBus *bus, uint32_t flags)
 {
-    pci_for_each_device(bus, 0, unplug_disks, &flags);
+    PCIBus *q35 = find_q35();
+    if (q35) {
+        /* When q35 is detected, we will remove the ahci controller
+	 * with the hard disks.  In the libxl config, cdrom devices
+	 * are put on a seperate ahci controller. This allows for 6 cdrom
+	 * devices to be added, and 6 qemu hard disks.
+	 */
+        pci_function_for_one_bus(bus, unplug_disks, &flags);
+    } else {
+        pci_for_each_device(bus, 0, unplug_disks, &flags);
+    }
 }
 
 static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t val)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 1cc7c89036..8eac3d751a 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1815,6 +1815,23 @@  void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
     }
 }
 
+void pci_function_for_one_bus(PCIBus *bus,
+                          void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
+                          void *opaque)
+{
+    bus = pci_find_bus_nr(bus, 0);
+
+    if (bus) {
+        PCIDevice *d;
+
+        d = bus->devices[PCI_DEVFN(4,0)];
+        if (d) {
+            fn(bus, d, opaque);
+            return;
+        }
+    }
+}
+
 void pci_for_each_device_under_bus(PCIBus *bus,
                                    pci_bus_dev_fn fn, void *opaque)
 {
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index e6d0574a29..c53e21082a 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -343,6 +343,9 @@  void pci_for_each_device_under_bus(PCIBus *bus,
 void pci_for_each_device_under_bus_reverse(PCIBus *bus,
                                            pci_bus_dev_fn fn,
                                            void *opaque);
+void pci_function_for_one_bus(PCIBus *bus,
+                         void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque),
+                         void *opaque);
 void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin,
                                   pci_bus_fn end, void *parent_state);
 PCIDevice *pci_get_function_0(PCIDevice *pci_dev);