diff mbox series

[PULL,v2,25/86] hw/cxl/component: Implement host bridge MMIO (8.2.5, table 142)

Message ID 20220516204913.542894-26-mst@redhat.com
State New
Headers show
Series [PULL,v2,01/86] virtio: fix feature negotiation for ACCESS_PLATFORM | expand

Commit Message

Michael S. Tsirkin May 16, 2022, 8:51 p.m. UTC
From: Ben Widawsky <ben.widawsky@intel.com>

CXL host bridges themselves may have MMIO. Since host bridges don't have
a BAR they are treated as special for MMIO.  This patch includes
i386/pc support.
Also hook up the device reset now that we have have the MMIO
space in which the results are visible.

Note that we duplicate the PCI express case for the aml_build but
the implementations will diverge when the CXL specific _OSC is
introduced.

Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
Co-developed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20220429144110.25167-24-Jonathan.Cameron@huawei.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/cxl/cxl.h                | 14 ++++++
 hw/i386/acpi-build.c                | 25 ++++++++++-
 hw/i386/pc.c                        | 27 +++++++++++-
 hw/pci-bridge/pci_expander_bridge.c | 66 ++++++++++++++++++++++++++---
 4 files changed, 122 insertions(+), 10 deletions(-)

Comments

Igor Mammedov June 16, 2022, 2:45 p.m. UTC | #1
On Mon, 16 May 2022 16:51:34 -0400
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> From: Ben Widawsky <ben.widawsky@intel.com>
> 
> CXL host bridges themselves may have MMIO. Since host bridges don't have
> a BAR they are treated as special for MMIO.  This patch includes
> i386/pc support.
> Also hook up the device reset now that we have have the MMIO
> space in which the results are visible.
> 
> Note that we duplicate the PCI express case for the aml_build but
> the implementations will diverge when the CXL specific _OSC is
> introduced.
> 
> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> Co-developed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> Message-Id: <20220429144110.25167-24-Jonathan.Cameron@huawei.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  include/hw/cxl/cxl.h                | 14 ++++++
>  hw/i386/acpi-build.c                | 25 ++++++++++-
>  hw/i386/pc.c                        | 27 +++++++++++-
>  hw/pci-bridge/pci_expander_bridge.c | 66 ++++++++++++++++++++++++++---
>  4 files changed, 122 insertions(+), 10 deletions(-)
> 
> diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
> index 31af92fd5e..8d1a7245d0 100644
> --- a/include/hw/cxl/cxl.h
> +++ b/include/hw/cxl/cxl.h
> @@ -10,6 +10,7 @@
>  #ifndef CXL_H
>  #define CXL_H
>  
> +#include "hw/pci/pci_host.h"
>  #include "cxl_pci.h"
>  #include "cxl_component.h"
>  #include "cxl_device.h"
> @@ -17,8 +18,21 @@
>  #define CXL_COMPONENT_REG_BAR_IDX 0
>  #define CXL_DEVICE_REG_BAR_IDX 2
>  
> +#define CXL_WINDOW_MAX 10
> +
>  typedef struct CXLState {
>      bool is_enabled;
> +    MemoryRegion host_mr;
> +    unsigned int next_mr_idx;
>  } CXLState;
>  
> +struct CXLHost {
> +    PCIHostState parent_obj;
> +
> +    CXLComponentState cxl_cstate;
> +};
> +
> +#define TYPE_PXB_CXL_HOST "pxb-cxl-host"
> +OBJECT_DECLARE_SIMPLE_TYPE(CXLHost, PXB_CXL_HOST)
> +
>  #endif
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index dcf6ece3d0..2d81b0f40c 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -28,6 +28,7 @@
>  #include "qemu/bitmap.h"
>  #include "qemu/error-report.h"
>  #include "hw/pci/pci.h"
> +#include "hw/cxl/cxl.h"
>  #include "hw/core/cpu.h"
>  #include "target/i386/cpu.h"
>  #include "hw/misc/pvpanic.h"
> @@ -1572,10 +1573,21 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>              }
>  
>              scope = aml_scope("\\_SB");
> -            dev = aml_device("PC%.02X", bus_num);
> +
> +            if (pci_bus_is_cxl(bus)) {
> +                dev = aml_device("CL%.02X", bus_num);
> +            } else {
> +                dev = aml_device("PC%.02X", bus_num);
> +            }
>              aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
>              aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
> -            if (pci_bus_is_express(bus)) {
> +            if (pci_bus_is_cxl(bus)) {
> +                aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
> +                aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
> +
> +                /* Expander bridges do not have ACPI PCI Hot-plug enabled */
> +                aml_append(dev, build_q35_osc_method(true));
> +            } else if (pci_bus_is_express(bus)) {
>                  aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
>                  aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
>  
> @@ -1595,6 +1607,15 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>              aml_append(dev, aml_name_decl("_CRS", crs));
>              aml_append(scope, dev);
>              aml_append(dsdt, scope);
> +
> +            /* Handle the ranges for the PXB expanders */
> +            if (pci_bus_is_cxl(bus)) {
> +                MemoryRegion *mr = &machine->cxl_devices_state->host_mr;
> +                uint64_t base = mr->addr;
> +
> +                crs_range_insert(crs_range_set.mem_ranges, base,
> +                                 base + memory_region_size(mr) - 1);
> +            }
>          }
>      }
>  
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 45e2d6092f..03d14f6564 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -75,6 +75,7 @@
>  #include "acpi-build.h"
>  #include "hw/mem/pc-dimm.h"
>  #include "hw/mem/nvdimm.h"
> +#include "hw/cxl/cxl.h"
>  #include "qapi/error.h"
>  #include "qapi/qapi-visit-common.h"
>  #include "qapi/qapi-visit-machine.h"
> @@ -816,6 +817,7 @@ void pc_memory_init(PCMachineState *pcms,
>      MachineClass *mc = MACHINE_GET_CLASS(machine);
>      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
>      X86MachineState *x86ms = X86_MACHINE(pcms);
> +    hwaddr cxl_base;
>  
>      assert(machine->ram_size == x86ms->below_4g_mem_size +
>                                  x86ms->above_4g_mem_size);
> @@ -905,6 +907,26 @@ void pc_memory_init(PCMachineState *pcms,
>                                      &machine->device_memory->mr);
>      }
>  
> +    if (machine->cxl_devices_state->is_enabled) {
> +        MemoryRegion *mr = &machine->cxl_devices_state->host_mr;
> +        hwaddr cxl_size = MiB;
> +
> +        if (pcmc->has_reserved_memory && machine->device_memory->base) {
> +            cxl_base = machine->device_memory->base;
> +            if (!pcmc->broken_reserved_end) {
> +                cxl_base += memory_region_size(&machine->device_memory->mr);
> +            }

while reviewing 1Tb hole patches, I've stumbled onto this
it looks wrong to ignore device_memory size here as RAM is/might still be mapped there
and guest can try to use it as RAM and then as CXL MMIO or other way around.
Most likely nothing good will come out of it, suggest make it unconditional and
always put cxl_base _after_ actual device_memory


> +        } else if (pcms->sgx_epc.size != 0) {
> +            cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
> +        } else {
> +            cxl_base = 0x100000000ULL + x86ms->above_4g_mem_size;
> +        }
> +
> +        e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
> +        memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
> +        memory_region_add_subregion(system_memory, cxl_base, mr);
> +    }
> +
>      /* Initialize PC system firmware */
>      pc_system_firmware_init(pcms, rom_memory);
>  
> @@ -965,7 +987,10 @@ uint64_t pc_pci_hole64_start(void)
>      X86MachineState *x86ms = X86_MACHINE(pcms);
>      uint64_t hole64_start = 0;
>  
> -    if (pcmc->has_reserved_memory && ms->device_memory->base) {
> +    if (ms->cxl_devices_state->host_mr.addr) {
> +        hole64_start = ms->cxl_devices_state->host_mr.addr +
> +            memory_region_size(&ms->cxl_devices_state->host_mr);
> +    } else if (pcmc->has_reserved_memory && ms->device_memory->base) {
>          hole64_start = ms->device_memory->base;
>          if (!pcmc->broken_reserved_end) {
>              hole64_start += memory_region_size(&ms->device_memory->mr);
> diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
> index f762eb4a6e..8fb4f2ea91 100644
> --- a/hw/pci-bridge/pci_expander_bridge.c
> +++ b/hw/pci-bridge/pci_expander_bridge.c
> @@ -61,12 +61,6 @@ DECLARE_INSTANCE_CHECKER(PXBDev, PXB_PCIE_DEV,
>  DECLARE_INSTANCE_CHECKER(PXBDev, PXB_CXL_DEV,
>                           TYPE_PXB_CXL_DEVICE)
>  
> -typedef struct CXLHost {
> -    PCIHostState parent_obj;
> -
> -    CXLComponentState cxl_cstate;
> -} CXLHost;
> -
>  struct PXBDev {
>      /*< private >*/
>      PCIDevice parent_obj;
> @@ -75,6 +69,9 @@ struct PXBDev {
>      uint8_t bus_nr;
>      uint16_t numa_node;
>      bool bypass_iommu;
> +    struct cxl_dev {
> +        CXLHost *cxl_host_bridge;
> +    } cxl;
>  };
>  
>  static PXBDev *convert_to_pxb(PCIDevice *dev)
> @@ -197,6 +194,52 @@ static const TypeInfo pxb_host_info = {
>      .class_init    = pxb_host_class_init,
>  };
>  
> +static void pxb_cxl_realize(DeviceState *dev, Error **errp)
> +{
> +    MachineState *ms = MACHINE(qdev_get_machine());
> +    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
> +    CXLHost *cxl = PXB_CXL_HOST(dev);
> +    CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
> +    struct MemoryRegion *mr = &cxl_cstate->crb.component_registers;
> +    hwaddr offset;
> +
> +    cxl_component_register_block_init(OBJECT(dev), cxl_cstate,
> +                                      TYPE_PXB_CXL_HOST);
> +    sysbus_init_mmio(sbd, mr);
> +
> +    offset = memory_region_size(mr) * ms->cxl_devices_state->next_mr_idx;
> +    if (offset > memory_region_size(&ms->cxl_devices_state->host_mr)) {
> +        error_setg(errp, "Insufficient space for pxb cxl host register space");
> +        return;
> +    }
> +
> +    memory_region_add_subregion(&ms->cxl_devices_state->host_mr, offset, mr);
> +    ms->cxl_devices_state->next_mr_idx++;
> +}
> +
> +static void pxb_cxl_host_class_init(ObjectClass *class, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(class);
> +    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(class);
> +
> +    hc->root_bus_path = pxb_host_root_bus_path;
> +    dc->fw_name = "cxl";
> +    dc->realize = pxb_cxl_realize;
> +    /* Reason: Internal part of the pxb/pxb-pcie device, not usable by itself */
> +    dc->user_creatable = false;
> +}
> +
> +/*
> + * This is a device to handle the MMIO for a CXL host bridge. It does nothing
> + * else.
> + */
> +static const TypeInfo cxl_host_info = {
> +    .name          = TYPE_PXB_CXL_HOST,
> +    .parent        = TYPE_PCI_HOST_BRIDGE,
> +    .instance_size = sizeof(CXLHost),
> +    .class_init    = pxb_cxl_host_class_init,
> +};
> +
>  /*
>   * Registers the PXB bus as a child of pci host root bus.
>   */
> @@ -245,6 +288,13 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin)
>  
>  static void pxb_dev_reset(DeviceState *dev)
>  {
> +    CXLHost *cxl = PXB_CXL_DEV(dev)->cxl.cxl_host_bridge;
> +    CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
> +    uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers;
> +    uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
> +
> +    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
> +    ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
>  }
>  
>  static gint pxb_compare(gconstpointer a, gconstpointer b)
> @@ -281,12 +331,13 @@ static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type,
>          dev_name = dev->qdev.id;
>      }
>  
> -    ds = qdev_new(TYPE_PXB_HOST);
> +    ds = qdev_new(type == CXL ? TYPE_PXB_CXL_HOST : TYPE_PXB_HOST);
>      if (type == PCIE) {
>          bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_PCIE_BUS);
>      } else if (type == CXL) {
>          bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_CXL_BUS);
>          bus->flags |= PCI_BUS_CXL;
> +        PXB_CXL_DEV(dev)->cxl.cxl_host_bridge = PXB_CXL_HOST(ds);
>      } else {
>          bus = pci_root_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS);
>          bds = qdev_new("pci-bridge");
> @@ -475,6 +526,7 @@ static void pxb_register_types(void)
>      type_register_static(&pxb_pcie_bus_info);
>      type_register_static(&pxb_cxl_bus_info);
>      type_register_static(&pxb_host_info);
> +    type_register_static(&cxl_host_info);
>      type_register_static(&pxb_dev_info);
>      type_register_static(&pxb_pcie_dev_info);
>      type_register_static(&pxb_cxl_dev_info);
Igor Mammedov June 17, 2022, 11:56 a.m. UTC | #2
On Fri, 17 Jun 2022 11:51:44 +0100
Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote:

> On Thu, 16 Jun 2022 16:45:00 +0200
> Igor Mammedov <imammedo@redhat.com> wrote:
> 
> > On Mon, 16 May 2022 16:51:34 -0400
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >   
> > > From: Ben Widawsky <ben.widawsky@intel.com>
> > > 
> > > CXL host bridges themselves may have MMIO. Since host bridges don't have
> > > a BAR they are treated as special for MMIO.  This patch includes
> > > i386/pc support.
> > > Also hook up the device reset now that we have have the MMIO
> > > space in which the results are visible.
> > > 
> > > Note that we duplicate the PCI express case for the aml_build but
> > > the implementations will diverge when the CXL specific _OSC is
> > > introduced.
> > > 
> > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> > > Co-developed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > > Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> > > Message-Id: <20220429144110.25167-24-Jonathan.Cameron@huawei.com>
> > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > ---
> > >  include/hw/cxl/cxl.h                | 14 ++++++
> > >  hw/i386/acpi-build.c                | 25 ++++++++++-
> > >  hw/i386/pc.c                        | 27 +++++++++++-
> > >  hw/pci-bridge/pci_expander_bridge.c | 66 ++++++++++++++++++++++++++---
> > >  4 files changed, 122 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
> > > index 31af92fd5e..8d1a7245d0 100644
> > > --- a/include/hw/cxl/cxl.h
> > > +++ b/include/hw/cxl/cxl.h
> > > @@ -10,6 +10,7 @@
> > >  #ifndef CXL_H
> > >  #define CXL_H
> > >  
> > > +#include "hw/pci/pci_host.h"
> > >  #include "cxl_pci.h"
> > >  #include "cxl_component.h"
> > >  #include "cxl_device.h"
> > > @@ -17,8 +18,21 @@
> > >  #define CXL_COMPONENT_REG_BAR_IDX 0
> > >  #define CXL_DEVICE_REG_BAR_IDX 2
> > >  
> > > +#define CXL_WINDOW_MAX 10
> > > +
> > >  typedef struct CXLState {
> > >      bool is_enabled;
> > > +    MemoryRegion host_mr;
> > > +    unsigned int next_mr_idx;
> > >  } CXLState;
> > >  
> > > +struct CXLHost {
> > > +    PCIHostState parent_obj;
> > > +
> > > +    CXLComponentState cxl_cstate;
> > > +};
> > > +
> > > +#define TYPE_PXB_CXL_HOST "pxb-cxl-host"
> > > +OBJECT_DECLARE_SIMPLE_TYPE(CXLHost, PXB_CXL_HOST)
> > > +
> > >  #endif
> > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > index dcf6ece3d0..2d81b0f40c 100644
> > > --- a/hw/i386/acpi-build.c
> > > +++ b/hw/i386/acpi-build.c
> > > @@ -28,6 +28,7 @@
> > >  #include "qemu/bitmap.h"
> > >  #include "qemu/error-report.h"
> > >  #include "hw/pci/pci.h"
> > > +#include "hw/cxl/cxl.h"
> > >  #include "hw/core/cpu.h"
> > >  #include "target/i386/cpu.h"
> > >  #include "hw/misc/pvpanic.h"
> > > @@ -1572,10 +1573,21 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
> > >              }
> > >  
> > >              scope = aml_scope("\\_SB");
> > > -            dev = aml_device("PC%.02X", bus_num);
> > > +
> > > +            if (pci_bus_is_cxl(bus)) {
> > > +                dev = aml_device("CL%.02X", bus_num);
> > > +            } else {
> > > +                dev = aml_device("PC%.02X", bus_num);
> > > +            }
> > >              aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
> > >              aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
> > > -            if (pci_bus_is_express(bus)) {
> > > +            if (pci_bus_is_cxl(bus)) {
> > > +                aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
> > > +                aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
> > > +
> > > +                /* Expander bridges do not have ACPI PCI Hot-plug enabled */
> > > +                aml_append(dev, build_q35_osc_method(true));
> > > +            } else if (pci_bus_is_express(bus)) {
> > >                  aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
> > >                  aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
> > >  
> > > @@ -1595,6 +1607,15 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
> > >              aml_append(dev, aml_name_decl("_CRS", crs));
> > >              aml_append(scope, dev);
> > >              aml_append(dsdt, scope);
> > > +
> > > +            /* Handle the ranges for the PXB expanders */
> > > +            if (pci_bus_is_cxl(bus)) {
> > > +                MemoryRegion *mr = &machine->cxl_devices_state->host_mr;
> > > +                uint64_t base = mr->addr;
> > > +
> > > +                crs_range_insert(crs_range_set.mem_ranges, base,
> > > +                                 base + memory_region_size(mr) - 1);
> > > +            }
> > >          }
> > >      }
> > >  
> > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > index 45e2d6092f..03d14f6564 100644
> > > --- a/hw/i386/pc.c
> > > +++ b/hw/i386/pc.c
> > > @@ -75,6 +75,7 @@
> > >  #include "acpi-build.h"
> > >  #include "hw/mem/pc-dimm.h"
> > >  #include "hw/mem/nvdimm.h"
> > > +#include "hw/cxl/cxl.h"
> > >  #include "qapi/error.h"
> > >  #include "qapi/qapi-visit-common.h"
> > >  #include "qapi/qapi-visit-machine.h"
> > > @@ -816,6 +817,7 @@ void pc_memory_init(PCMachineState *pcms,
> > >      MachineClass *mc = MACHINE_GET_CLASS(machine);
> > >      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> > >      X86MachineState *x86ms = X86_MACHINE(pcms);
> > > +    hwaddr cxl_base;
> > >  
> > >      assert(machine->ram_size == x86ms->below_4g_mem_size +
> > >                                  x86ms->above_4g_mem_size);
> > > @@ -905,6 +907,26 @@ void pc_memory_init(PCMachineState *pcms,
> > >                                      &machine->device_memory->mr);
> > >      }
> > >  
> > > +    if (machine->cxl_devices_state->is_enabled) {
> > > +        MemoryRegion *mr = &machine->cxl_devices_state->host_mr;
> > > +        hwaddr cxl_size = MiB;
> > > +
> > > +        if (pcmc->has_reserved_memory && machine->device_memory->base) {
> > > +            cxl_base = machine->device_memory->base;
> > > +            if (!pcmc->broken_reserved_end) {
> > > +                cxl_base += memory_region_size(&machine->device_memory->mr);
> > > +            }    
> > 
> > while reviewing 1Tb hole patches, I've stumbled onto this
> > it looks wrong to ignore device_memory size here as RAM is/might still be mapped there
> > and guest can try to use it as RAM and then as CXL MMIO or other way around.
> > Most likely nothing good will come out of it, suggest make it unconditional and
> > always put cxl_base _after_ actual device_memory  
> 
> Ah. I didn't fully understand the broken_reserved_end handling.
> 
> Just to check I understand correctly are you suggesting.
> 
>         if (pcmc->has_reserved_memory && machine->device_memory->base) {
>             cxl_base = machine->device_memory->base + 
>                 memory_region_size(&machine->device_memory->mr);
>         } else if (pcms->sgx_epc.size != 0) {
> ...

yep, something like this
(fix should be ready and go into qemu-7.1 as it regresses access to RAM
and would change memory layout)

> ?
> 
> Thanks,
> 
> Jonathan
> 
> 
> 
> > 
> >   
> > > +        } else if (pcms->sgx_epc.size != 0) {
> > > +            cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
> > > +        } else {
> > > +            cxl_base = 0x100000000ULL + x86ms->above_4g_mem_size;
> > > +        }
> > > +
> > > +        e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
> > > +        memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
> > > +        memory_region_add_subregion(system_memory, cxl_base, mr);
> > > +    }
> > > +
> > >      /* Initialize PC system firmware */
> > >      pc_system_firmware_init(pcms, rom_memory);
> > >  
> > > @@ -965,7 +987,10 @@ uint64_t pc_pci_hole64_start(void)
> > >      X86MachineState *x86ms = X86_MACHINE(pcms);
> > >      uint64_t hole64_start = 0;
> > >  
> > > -    if (pcmc->has_reserved_memory && ms->device_memory->base) {
> > > +    if (ms->cxl_devices_state->host_mr.addr) {
> > > +        hole64_start = ms->cxl_devices_state->host_mr.addr +
> > > +            memory_region_size(&ms->cxl_devices_state->host_mr);
> > > +    } else if (pcmc->has_reserved_memory && ms->device_memory->base) {
> > >          hole64_start = ms->device_memory->base;
> > >          if (!pcmc->broken_reserved_end) {
> > >              hole64_start += memory_region_size(&ms->device_memory->mr);
> > > diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
> > > index f762eb4a6e..8fb4f2ea91 100644
> > > --- a/hw/pci-bridge/pci_expander_bridge.c
> > > +++ b/hw/pci-bridge/pci_expander_bridge.c
> > > @@ -61,12 +61,6 @@ DECLARE_INSTANCE_CHECKER(PXBDev, PXB_PCIE_DEV,
> > >  DECLARE_INSTANCE_CHECKER(PXBDev, PXB_CXL_DEV,
> > >                           TYPE_PXB_CXL_DEVICE)
> > >  
> > > -typedef struct CXLHost {
> > > -    PCIHostState parent_obj;
> > > -
> > > -    CXLComponentState cxl_cstate;
> > > -} CXLHost;
> > > -
> > >  struct PXBDev {
> > >      /*< private >*/
> > >      PCIDevice parent_obj;
> > > @@ -75,6 +69,9 @@ struct PXBDev {
> > >      uint8_t bus_nr;
> > >      uint16_t numa_node;
> > >      bool bypass_iommu;
> > > +    struct cxl_dev {
> > > +        CXLHost *cxl_host_bridge;
> > > +    } cxl;
> > >  };
> > >  
> > >  static PXBDev *convert_to_pxb(PCIDevice *dev)
> > > @@ -197,6 +194,52 @@ static const TypeInfo pxb_host_info = {
> > >      .class_init    = pxb_host_class_init,
> > >  };
> > >  
> > > +static void pxb_cxl_realize(DeviceState *dev, Error **errp)
> > > +{
> > > +    MachineState *ms = MACHINE(qdev_get_machine());
> > > +    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
> > > +    CXLHost *cxl = PXB_CXL_HOST(dev);
> > > +    CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
> > > +    struct MemoryRegion *mr = &cxl_cstate->crb.component_registers;
> > > +    hwaddr offset;
> > > +
> > > +    cxl_component_register_block_init(OBJECT(dev), cxl_cstate,
> > > +                                      TYPE_PXB_CXL_HOST);
> > > +    sysbus_init_mmio(sbd, mr);
> > > +
> > > +    offset = memory_region_size(mr) * ms->cxl_devices_state->next_mr_idx;
> > > +    if (offset > memory_region_size(&ms->cxl_devices_state->host_mr)) {
> > > +        error_setg(errp, "Insufficient space for pxb cxl host register space");
> > > +        return;
> > > +    }
> > > +
> > > +    memory_region_add_subregion(&ms->cxl_devices_state->host_mr, offset, mr);
> > > +    ms->cxl_devices_state->next_mr_idx++;
> > > +}
> > > +
> > > +static void pxb_cxl_host_class_init(ObjectClass *class, void *data)
> > > +{
> > > +    DeviceClass *dc = DEVICE_CLASS(class);
> > > +    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(class);
> > > +
> > > +    hc->root_bus_path = pxb_host_root_bus_path;
> > > +    dc->fw_name = "cxl";
> > > +    dc->realize = pxb_cxl_realize;
> > > +    /* Reason: Internal part of the pxb/pxb-pcie device, not usable by itself */
> > > +    dc->user_creatable = false;
> > > +}
> > > +
> > > +/*
> > > + * This is a device to handle the MMIO for a CXL host bridge. It does nothing
> > > + * else.
> > > + */
> > > +static const TypeInfo cxl_host_info = {
> > > +    .name          = TYPE_PXB_CXL_HOST,
> > > +    .parent        = TYPE_PCI_HOST_BRIDGE,
> > > +    .instance_size = sizeof(CXLHost),
> > > +    .class_init    = pxb_cxl_host_class_init,
> > > +};
> > > +
> > >  /*
> > >   * Registers the PXB bus as a child of pci host root bus.
> > >   */
> > > @@ -245,6 +288,13 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin)
> > >  
> > >  static void pxb_dev_reset(DeviceState *dev)
> > >  {
> > > +    CXLHost *cxl = PXB_CXL_DEV(dev)->cxl.cxl_host_bridge;
> > > +    CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
> > > +    uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers;
> > > +    uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
> > > +
> > > +    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
> > >  }
> > >  
> > >  static gint pxb_compare(gconstpointer a, gconstpointer b)
> > > @@ -281,12 +331,13 @@ static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type,
> > >          dev_name = dev->qdev.id;
> > >      }
> > >  
> > > -    ds = qdev_new(TYPE_PXB_HOST);
> > > +    ds = qdev_new(type == CXL ? TYPE_PXB_CXL_HOST : TYPE_PXB_HOST);
> > >      if (type == PCIE) {
> > >          bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_PCIE_BUS);
> > >      } else if (type == CXL) {
> > >          bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_CXL_BUS);
> > >          bus->flags |= PCI_BUS_CXL;
> > > +        PXB_CXL_DEV(dev)->cxl.cxl_host_bridge = PXB_CXL_HOST(ds);
> > >      } else {
> > >          bus = pci_root_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS);
> > >          bds = qdev_new("pci-bridge");
> > > @@ -475,6 +526,7 @@ static void pxb_register_types(void)
> > >      type_register_static(&pxb_pcie_bus_info);
> > >      type_register_static(&pxb_cxl_bus_info);
> > >      type_register_static(&pxb_host_info);
> > > +    type_register_static(&cxl_host_info);
> > >      type_register_static(&pxb_dev_info);
> > >      type_register_static(&pxb_pcie_dev_info);
> > >      type_register_static(&pxb_cxl_dev_info);    
> >   
>
diff mbox series

Patch

diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
index 31af92fd5e..8d1a7245d0 100644
--- a/include/hw/cxl/cxl.h
+++ b/include/hw/cxl/cxl.h
@@ -10,6 +10,7 @@ 
 #ifndef CXL_H
 #define CXL_H
 
+#include "hw/pci/pci_host.h"
 #include "cxl_pci.h"
 #include "cxl_component.h"
 #include "cxl_device.h"
@@ -17,8 +18,21 @@ 
 #define CXL_COMPONENT_REG_BAR_IDX 0
 #define CXL_DEVICE_REG_BAR_IDX 2
 
+#define CXL_WINDOW_MAX 10
+
 typedef struct CXLState {
     bool is_enabled;
+    MemoryRegion host_mr;
+    unsigned int next_mr_idx;
 } CXLState;
 
+struct CXLHost {
+    PCIHostState parent_obj;
+
+    CXLComponentState cxl_cstate;
+};
+
+#define TYPE_PXB_CXL_HOST "pxb-cxl-host"
+OBJECT_DECLARE_SIMPLE_TYPE(CXLHost, PXB_CXL_HOST)
+
 #endif
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index dcf6ece3d0..2d81b0f40c 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -28,6 +28,7 @@ 
 #include "qemu/bitmap.h"
 #include "qemu/error-report.h"
 #include "hw/pci/pci.h"
+#include "hw/cxl/cxl.h"
 #include "hw/core/cpu.h"
 #include "target/i386/cpu.h"
 #include "hw/misc/pvpanic.h"
@@ -1572,10 +1573,21 @@  build_dsdt(GArray *table_data, BIOSLinker *linker,
             }
 
             scope = aml_scope("\\_SB");
-            dev = aml_device("PC%.02X", bus_num);
+
+            if (pci_bus_is_cxl(bus)) {
+                dev = aml_device("CL%.02X", bus_num);
+            } else {
+                dev = aml_device("PC%.02X", bus_num);
+            }
             aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
             aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
-            if (pci_bus_is_express(bus)) {
+            if (pci_bus_is_cxl(bus)) {
+                aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
+                aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
+
+                /* Expander bridges do not have ACPI PCI Hot-plug enabled */
+                aml_append(dev, build_q35_osc_method(true));
+            } else if (pci_bus_is_express(bus)) {
                 aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
                 aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
 
@@ -1595,6 +1607,15 @@  build_dsdt(GArray *table_data, BIOSLinker *linker,
             aml_append(dev, aml_name_decl("_CRS", crs));
             aml_append(scope, dev);
             aml_append(dsdt, scope);
+
+            /* Handle the ranges for the PXB expanders */
+            if (pci_bus_is_cxl(bus)) {
+                MemoryRegion *mr = &machine->cxl_devices_state->host_mr;
+                uint64_t base = mr->addr;
+
+                crs_range_insert(crs_range_set.mem_ranges, base,
+                                 base + memory_region_size(mr) - 1);
+            }
         }
     }
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 45e2d6092f..03d14f6564 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -75,6 +75,7 @@ 
 #include "acpi-build.h"
 #include "hw/mem/pc-dimm.h"
 #include "hw/mem/nvdimm.h"
+#include "hw/cxl/cxl.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-common.h"
 #include "qapi/qapi-visit-machine.h"
@@ -816,6 +817,7 @@  void pc_memory_init(PCMachineState *pcms,
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     X86MachineState *x86ms = X86_MACHINE(pcms);
+    hwaddr cxl_base;
 
     assert(machine->ram_size == x86ms->below_4g_mem_size +
                                 x86ms->above_4g_mem_size);
@@ -905,6 +907,26 @@  void pc_memory_init(PCMachineState *pcms,
                                     &machine->device_memory->mr);
     }
 
+    if (machine->cxl_devices_state->is_enabled) {
+        MemoryRegion *mr = &machine->cxl_devices_state->host_mr;
+        hwaddr cxl_size = MiB;
+
+        if (pcmc->has_reserved_memory && machine->device_memory->base) {
+            cxl_base = machine->device_memory->base;
+            if (!pcmc->broken_reserved_end) {
+                cxl_base += memory_region_size(&machine->device_memory->mr);
+            }
+        } else if (pcms->sgx_epc.size != 0) {
+            cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
+        } else {
+            cxl_base = 0x100000000ULL + x86ms->above_4g_mem_size;
+        }
+
+        e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
+        memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
+        memory_region_add_subregion(system_memory, cxl_base, mr);
+    }
+
     /* Initialize PC system firmware */
     pc_system_firmware_init(pcms, rom_memory);
 
@@ -965,7 +987,10 @@  uint64_t pc_pci_hole64_start(void)
     X86MachineState *x86ms = X86_MACHINE(pcms);
     uint64_t hole64_start = 0;
 
-    if (pcmc->has_reserved_memory && ms->device_memory->base) {
+    if (ms->cxl_devices_state->host_mr.addr) {
+        hole64_start = ms->cxl_devices_state->host_mr.addr +
+            memory_region_size(&ms->cxl_devices_state->host_mr);
+    } else if (pcmc->has_reserved_memory && ms->device_memory->base) {
         hole64_start = ms->device_memory->base;
         if (!pcmc->broken_reserved_end) {
             hole64_start += memory_region_size(&ms->device_memory->mr);
diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index f762eb4a6e..8fb4f2ea91 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -61,12 +61,6 @@  DECLARE_INSTANCE_CHECKER(PXBDev, PXB_PCIE_DEV,
 DECLARE_INSTANCE_CHECKER(PXBDev, PXB_CXL_DEV,
                          TYPE_PXB_CXL_DEVICE)
 
-typedef struct CXLHost {
-    PCIHostState parent_obj;
-
-    CXLComponentState cxl_cstate;
-} CXLHost;
-
 struct PXBDev {
     /*< private >*/
     PCIDevice parent_obj;
@@ -75,6 +69,9 @@  struct PXBDev {
     uint8_t bus_nr;
     uint16_t numa_node;
     bool bypass_iommu;
+    struct cxl_dev {
+        CXLHost *cxl_host_bridge;
+    } cxl;
 };
 
 static PXBDev *convert_to_pxb(PCIDevice *dev)
@@ -197,6 +194,52 @@  static const TypeInfo pxb_host_info = {
     .class_init    = pxb_host_class_init,
 };
 
+static void pxb_cxl_realize(DeviceState *dev, Error **errp)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    CXLHost *cxl = PXB_CXL_HOST(dev);
+    CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
+    struct MemoryRegion *mr = &cxl_cstate->crb.component_registers;
+    hwaddr offset;
+
+    cxl_component_register_block_init(OBJECT(dev), cxl_cstate,
+                                      TYPE_PXB_CXL_HOST);
+    sysbus_init_mmio(sbd, mr);
+
+    offset = memory_region_size(mr) * ms->cxl_devices_state->next_mr_idx;
+    if (offset > memory_region_size(&ms->cxl_devices_state->host_mr)) {
+        error_setg(errp, "Insufficient space for pxb cxl host register space");
+        return;
+    }
+
+    memory_region_add_subregion(&ms->cxl_devices_state->host_mr, offset, mr);
+    ms->cxl_devices_state->next_mr_idx++;
+}
+
+static void pxb_cxl_host_class_init(ObjectClass *class, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(class);
+    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(class);
+
+    hc->root_bus_path = pxb_host_root_bus_path;
+    dc->fw_name = "cxl";
+    dc->realize = pxb_cxl_realize;
+    /* Reason: Internal part of the pxb/pxb-pcie device, not usable by itself */
+    dc->user_creatable = false;
+}
+
+/*
+ * This is a device to handle the MMIO for a CXL host bridge. It does nothing
+ * else.
+ */
+static const TypeInfo cxl_host_info = {
+    .name          = TYPE_PXB_CXL_HOST,
+    .parent        = TYPE_PCI_HOST_BRIDGE,
+    .instance_size = sizeof(CXLHost),
+    .class_init    = pxb_cxl_host_class_init,
+};
+
 /*
  * Registers the PXB bus as a child of pci host root bus.
  */
@@ -245,6 +288,13 @@  static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin)
 
 static void pxb_dev_reset(DeviceState *dev)
 {
+    CXLHost *cxl = PXB_CXL_DEV(dev)->cxl.cxl_host_bridge;
+    CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
+    uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers;
+    uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
+
+    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
+    ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
 }
 
 static gint pxb_compare(gconstpointer a, gconstpointer b)
@@ -281,12 +331,13 @@  static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type,
         dev_name = dev->qdev.id;
     }
 
-    ds = qdev_new(TYPE_PXB_HOST);
+    ds = qdev_new(type == CXL ? TYPE_PXB_CXL_HOST : TYPE_PXB_HOST);
     if (type == PCIE) {
         bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_PCIE_BUS);
     } else if (type == CXL) {
         bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_CXL_BUS);
         bus->flags |= PCI_BUS_CXL;
+        PXB_CXL_DEV(dev)->cxl.cxl_host_bridge = PXB_CXL_HOST(ds);
     } else {
         bus = pci_root_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS);
         bds = qdev_new("pci-bridge");
@@ -475,6 +526,7 @@  static void pxb_register_types(void)
     type_register_static(&pxb_pcie_bus_info);
     type_register_static(&pxb_cxl_bus_info);
     type_register_static(&pxb_host_info);
+    type_register_static(&cxl_host_info);
     type_register_static(&pxb_dev_info);
     type_register_static(&pxb_pcie_dev_info);
     type_register_static(&pxb_cxl_dev_info);