Message ID | 1386282785-466-12-git-send-email-mdroth@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
On 12/06/2013 09:33 AM, Michael Roth wrote: > From: Mike Day <ncmike@ncultra.org> > > This enables hotplug for PHB bridges. Upon hotplug we generate the > OF-nodes required by PAPR specification and IEEE 1275-1994 > "PCI Bus Binding to Open Firmware" for the device. > > We associate the corresponding FDT for these nodes with the DrcEntry > corresponding to the slot, which will be fetched via > ibm,configure-connector RTAS calls by the guest as described by PAPR > specification. The FDT is cleaned up in the case of unplug. > > Amongst the required OF-node properties for each device are the "reg" > and "assigned-addresses" properties which describe the BAR-assignments > for IO/MEM/ROM regions. To handle these assignments we scan the address > space associated with each region for a contiguous range of appropriate > size based on PCI specification and encode these in accordance with > Open Firmware PCI Bus Binding spec. > > These assignments will be used by the guest when the rpaphp hotplug > module is used, but may be re-assigned by guests for cases where we > rely on bus rescan. > > Signed-off-by: Mike Day <ncmike@ncultra.org> > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com> > --- > hw/ppc/spapr_pci.c | 375 ++++++++++++++++++++++++++++++++++++++++++++++-- > include/hw/ppc/spapr.h | 1 + > 2 files changed, 368 insertions(+), 8 deletions(-) > > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > index 6e7ee31..9b4f829 100644 > --- a/hw/ppc/spapr_pci.c > +++ b/hw/ppc/spapr_pci.c > @@ -56,6 +56,17 @@ > #define RTAS_TYPE_MSI 1 > #define RTAS_TYPE_MSIX 2 > > +#define FDT_MAX_SIZE 0x10000 > +#define _FDT(exp) \ > + do { \ > + int ret = (exp); \ > + if (ret < 0) { \ > + return ret; \ > + } \ > + } while (0) > + > +static void spapr_drc_state_reset(DrcEntry *drc_entry); > + > static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid) > { > sPAPRPHBState *sphb; > @@ -448,6 +459,22 @@ static void rtas_set_indicator(PowerPCCPU *cpu, sPAPREnvironment *spapr, > /* encode the new value into the correct bit field */ > shift = INDICATOR_ISOLATION_SHIFT; > mask = INDICATOR_ISOLATION_MASK; > + if (drc_entry) { > + /* transition from unisolated to isolated for a hotplug slot > + * entails completion of guest-side device unplug/cleanup, so > + * we can now safely remove the device if qemu is waiting for > + * it to be released > + */ > + if (DECODE_DRC_STATE(*pind, mask, shift) != indicator_state) { > + if (indicator_state == 0 && drc_entry->awaiting_release) { > + /* device_del has been called and host is waiting > + * for guest to release/isolate device, go ahead > + * and remove it now > + */ > + spapr_drc_state_reset(drc_entry); > + } > + } > + } > break; > case 9002: /* DR */ > shift = INDICATOR_DR_SHIFT; > @@ -776,6 +803,345 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) > return &phb->iommu_as; > } > > +/* for 'reg'/'assigned-addresses' OF properties */ > +#define RESOURCE_CELLS_SIZE 2 > +#define RESOURCE_CELLS_ADDRESS 3 > +#define RESOURCE_CELLS_TOTAL \ > + (RESOURCE_CELLS_SIZE + RESOURCE_CELLS_ADDRESS) > + > +static void fill_resource_props(PCIDevice *d, int bus_num, > + uint32_t *reg, int *reg_size, > + uint32_t *assigned, int *assigned_size) > +{ > + uint32_t *reg_row, *assigned_row; > + uint32_t dev_id = ((bus_num << 8) | > + (PCI_SLOT(d->devfn) << 3) | PCI_FUNC(d->devfn)); > + int i, idx = 0; > + > + reg[0] = cpu_to_be32(dev_id << 8); > + > + for (i = 0; i < PCI_NUM_REGIONS; i++) { > + if (!d->io_regions[i].size) { > + continue; > + } > + reg_row = ®[(idx + 1) * RESOURCE_CELLS_TOTAL]; > + assigned_row = &assigned[idx * RESOURCE_CELLS_TOTAL]; > + reg_row[0] = cpu_to_be32((dev_id << 8) | (pci_bar(d, i) & 0xff)); > + if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { > + reg_row[0] |= cpu_to_be32(0x01000000); > + } else { > + reg_row[0] |= cpu_to_be32(0x02000000); > + } > + assigned_row[0] = cpu_to_be32(reg_row[0] | 0x80000000); 0x80000000 == relocatable? 0x01000000/0x02000000 - space codes? There are macros (b_n, b_ss) in this file, can you please use them? > + assigned_row[3] = reg_row[3] = cpu_to_be32(d->io_regions[i].size >> 32); > + assigned_row[4] = reg_row[4] = cpu_to_be32(d->io_regions[i].size); > + assigned_row[1] = cpu_to_be32(d->io_regions[i].addr >> 32); > + assigned_row[2] = cpu_to_be32(d->io_regions[i].addr); > + idx++; > + } > + > + *reg_size = (idx + 1) * RESOURCE_CELLS_TOTAL * sizeof(uint32_t); > + *assigned_size = idx * RESOURCE_CELLS_TOTAL * sizeof(uint32_t); > +} > + > +static hwaddr spapr_find_bar_addr(sPAPRPHBState *phb, PCIIORegion *r) This does not use @pbh at all and therefore can go to hw/pci/pci.c may be (which can be tricky though)? > +{ > + MemoryRegionSection mrs = { 0 }; > + hwaddr search_addr; > + hwaddr size = r->size; > + hwaddr addr_mask = ~(size - 1); > + hwaddr increment = size; > + hwaddr limit; > + > + if (r->type == PCI_BASE_ADDRESS_SPACE_MEMORY) { > + /* beginning portion of mmio address space for bus does not get > + * mapped into system memory, so calculate addr starting at the > + * corresponding offset into mmio as. > + */ > + search_addr = (SPAPR_PCI_MEM_WIN_BUS_OFFSET + increment) & addr_mask; > + } else { > + search_addr = increment; > + } > + limit = memory_region_size(r->address_space); > + > + do { > + mrs = memory_region_find_subregion(r->address_space, search_addr, size); > + if (mrs.mr) { > + hwaddr mr_last_addr; > + mr_last_addr = mrs.mr->addr + memory_region_size(mrs.mr) - 1; > + search_addr = (mr_last_addr + 1) & addr_mask; > + if (search_addr <= mr_last_addr) { > + search_addr += increment; > + } > + /* this memory region overlaps, unref and continue searching */ > + memory_region_unref(mrs.mr); > + } > + } while (int128_nz(mrs.size) && search_addr + size <= limit); > + > + if (search_addr + size >= limit) { > + return PCI_BAR_UNMAPPED; > + } > + > + return search_addr; > +} > + > +static int spapr_map_bars(sPAPRPHBState *phb, PCIDevice *dev) This does not use @phb, well, it uses to call spapr_find_bar_addr() but that function does not use it either. Yet another candidate to get moved to hw/pci/pci.c? If you do so, you'll get even more reviews :) > +{ > + PCIIORegion *r; > + int i, ret = -1; > + > + for (i = 0; i < PCI_NUM_REGIONS; i++) { > + uint32_t bar_address = pci_bar(dev, i); > + uint32_t bar_value; > + uint16_t cmd_value = pci_default_read_config(dev, PCI_COMMAND, 2); > + hwaddr addr; > + > + r = &dev->io_regions[i]; > + > + /* this region isn't registered */ > + if (!r->size) { > + continue; > + } > + > + /* find a hw addr we can map */ > + addr = spapr_find_bar_addr(phb, r); > + if (addr == PCI_BAR_UNMAPPED) { > + /* we can't find a free range within address space for this BAR */ > + fprintf(stderr, > + "Unable to map BAR %d, no free range available\n", i); > + return -1; > + } > + /* we can probably map this region into memory if there is not > + * a race condition with some other allocator. write the address > + * to the device BAR which will force a call to pci_update_mappings > + */ > + if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { > + pci_default_write_config(dev, PCI_COMMAND, > + cmd_value | PCI_COMMAND_IO, 2); > + } else { > + pci_default_write_config(dev, PCI_COMMAND, > + cmd_value | PCI_COMMAND_MEMORY, 2); > + } > + > + bar_value = addr; > + > + if (i == PCI_ROM_SLOT) { > + bar_value |= PCI_ROM_ADDRESS_ENABLE; > + } > + /* write the new bar value */ > + pci_default_write_config(dev, bar_address, bar_value, 4); > + > + /* if this is a 64-bit BAR, we need to also write the > + * upper 32 bit value. > + */ > + if (r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { > + bar_value = (addr >> 32) & 0xffffffffUL; > + pci_default_write_config(dev, bar_address + 4, bar_value, 4); > + } > + ret = 0; > + } > + return ret; > +} > + > +static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, > + int phb_index) > +{ > + int slot = PCI_SLOT(dev->devfn); > + char slotname[16]; > + bool is_bridge = 1; > + DrcEntry *drc_entry, *drc_entry_slot; > + uint32_t reg[RESOURCE_CELLS_TOTAL * 8] = { 0 }; > + uint32_t assigned[RESOURCE_CELLS_TOTAL * 8] = { 0 }; > + int reg_size, assigned_size; > + > + drc_entry = spapr_phb_to_drc_entry(phb_index + SPAPR_PCI_BASE_BUID); > + g_assert(drc_entry); > + drc_entry_slot = &drc_entry->child_entries[slot]; > + > + if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == s/1/PCI_HEADER_TYPE_BRIDGE/ > + PCI_HEADER_TYPE_NORMAL) { > + is_bridge = 0; > + } > + > + _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", > + pci_default_read_config(dev, PCI_VENDOR_ID, 2))); > + _FDT(fdt_setprop_cell(fdt, offset, "device-id", > + pci_default_read_config(dev, PCI_DEVICE_ID, 2))); > + _FDT(fdt_setprop_cell(fdt, offset, "revision-id", > + pci_default_read_config(dev, PCI_REVISION_ID, 1))); > + _FDT(fdt_setprop_cell(fdt, offset, "class-code", > + pci_default_read_config(dev, PCI_CLASS_DEVICE, 2) << 8)); > + > + _FDT(fdt_setprop_cell(fdt, offset, "interrupts", > + pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1))); > + > + /* if this device is NOT a bridge */ > + if (!is_bridge) { s/!is_bridge/pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == PCI_HEADER_TYPE_NORMAL/ and get rid of is_bridge? > + _FDT(fdt_setprop_cell(fdt, offset, "min-grant", > + pci_default_read_config(dev, PCI_MIN_GNT, 1))); > + _FDT(fdt_setprop_cell(fdt, offset, "max-latency", > + pci_default_read_config(dev, PCI_MAX_LAT, 1))); > + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", > + pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2))); > + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id", > + pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2))); > + } > + > + _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", > + pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1))); > + > + /* the following fdt cells are masked off the pci status register */ > + int pci_status = pci_default_read_config(dev, PCI_STATUS, 2); > + _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed", > + PCI_STATUS_DEVSEL_MASK & pci_status)); > + _FDT(fdt_setprop_cell(fdt, offset, "fast-back-to-back", > + PCI_STATUS_FAST_BACK & pci_status)); > + _FDT(fdt_setprop_cell(fdt, offset, "66mhz-capable", > + PCI_STATUS_66MHZ & pci_status)); > + _FDT(fdt_setprop_cell(fdt, offset, "udf-supported", > + PCI_STATUS_UDF & pci_status)); > + > + _FDT(fdt_setprop_string(fdt, offset, "name", "pci")); > + sprintf(slotname, "Slot %d", slot + phb_index * 32); > + _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", slotname, strlen(slotname))); > + _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", > + drc_entry_slot->drc_index)); > + > + _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", > + RESOURCE_CELLS_ADDRESS)); > + _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", > + RESOURCE_CELLS_SIZE)); > + _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", > + RESOURCE_CELLS_SIZE)); > + fill_resource_props(dev, phb_index, reg, ®_size, > + assigned, &assigned_size); > + _FDT(fdt_setprop(fdt, offset, "reg", reg, reg_size)); > + _FDT(fdt_setprop(fdt, offset, "assigned-addresses", > + assigned, assigned_size)); > + > + return 0; > +} > + > +static int spapr_device_hotplug_add(DeviceState *qdev, PCIDevice *dev) > +{ > + sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(qdev); > + DrcEntry *drc_entry, *drc_entry_slot; > + ConfigureConnectorState *ccs; > + int slot = PCI_SLOT(dev->devfn); > + int offset, ret; > + void *fdt_orig, *fdt; > + char nodename[512]; > + uint32_t encoded = ENCODE_DRC_STATE(INDICATOR_ENTITY_SENSE_PRESENT, > + INDICATOR_ENTITY_SENSE_MASK, > + INDICATOR_ENTITY_SENSE_SHIFT); > + > + drc_entry = spapr_phb_to_drc_entry(phb->buid); > + g_assert(drc_entry); > + drc_entry_slot = &drc_entry->child_entries[slot]; > + > + drc_entry->state &= ~(uint32_t)INDICATOR_ENTITY_SENSE_MASK; > + drc_entry->state |= encoded; /* DR entity present */ > + drc_entry_slot->state &= ~(uint32_t)INDICATOR_ENTITY_SENSE_MASK; > + drc_entry_slot->state |= encoded; /* and the slot */ "and the slot" what? s/uint32_t encoded/const uint32_t present/ and remove the comments? > + /* need to allocate memory region for device BARs */ > + spapr_map_bars(phb, dev); > + > + /* add OF node for pci device and required OF DT properties */ > + fdt_orig = g_malloc0(FDT_MAX_SIZE); > + offset = fdt_create(fdt_orig, FDT_MAX_SIZE); > + fdt_begin_node(fdt_orig, ""); > + fdt_end_node(fdt_orig); > + fdt_finish(fdt_orig); > + > + fdt = g_malloc0(FDT_MAX_SIZE); > + fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE); > + sprintf(nodename, "pci@%d", slot); > + offset = fdt_add_subnode(fdt, 0, nodename); > + ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index); > + g_assert(!ret); > + g_free(fdt_orig); > + > + /* hold on to node, configure_connector will pass it to the guest later */ > + ccs = &drc_entry_slot->cc_state; > + ccs->fdt = fdt; > + ccs->offset_start = offset; > + ccs->state = CC_STATE_PENDING; > + ccs->dev = dev; > + > + return 0; > +} > + > +/* check whether guest has released/isolated device */ > +static bool spapr_drc_state_is_releasable(DrcEntry *drc_entry) > +{ > + return !DECODE_DRC_STATE(drc_entry->state, > + INDICATOR_ISOLATION_MASK, > + INDICATOR_ISOLATION_SHIFT); > +} It looks like this is the only separated function which calls DECODE_DRC_STATE, and it is used just once, and others call DECODE_DRC_STATE()/ENCODE_DRC_STATE() directly. I'd remove this function and call DECODE_DRC_STATE() directly, below in the code. > + > +/* finalize device unplug/deletion */ > +static void spapr_drc_state_reset(DrcEntry *drc_entry) > +{ > + ConfigureConnectorState *ccs = &drc_entry->cc_state; > + uint32_t sense_empty = ENCODE_DRC_STATE(INDICATOR_ENTITY_SENSE_EMPTY, > + INDICATOR_ENTITY_SENSE_MASK, > + INDICATOR_ENTITY_SENSE_SHIFT); > + > + g_free(ccs->fdt); > + ccs->fdt = NULL; > + object_unparent(OBJECT(ccs->dev)); > + ccs->dev = NULL; > + ccs->state = CC_STATE_IDLE; > + drc_entry->state &= ~INDICATOR_ENTITY_SENSE_MASK; > + drc_entry->state |= sense_empty; > + drc_entry->awaiting_release = false; > +} > + > +static void spapr_device_hotplug_remove(DeviceState *qdev, PCIDevice *dev) > +{ > + sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(qdev); > + DrcEntry *drc_entry, *drc_entry_slot; > + ConfigureConnectorState *ccs; > + int slot = PCI_SLOT(dev->devfn); > + > + drc_entry = spapr_phb_to_drc_entry(phb->buid); > + g_assert(drc_entry); > + drc_entry_slot = &drc_entry->child_entries[slot]; > + ccs = &drc_entry_slot->cc_state; > + /* shouldn't be removing devices we haven't created an fdt for */ > + g_assert(ccs->state != CC_STATE_IDLE); Instead of g_assert(), would not it be better to return -1 here and then return this return code from spapr_device_hotplug() and let common PCI code handle this? Or we are absolutely sure that spapr_device_hotplug() cannot possibly fail so we are ready to kill the guest if it does? I do not know, just asking :) > + /* if the device has already been released/isolated by guest, go ahead > + * and remove it now. Otherwise, flag it as pending guest release so it > + * can be removed later > + */ > + if (spapr_drc_state_is_releasable(drc_entry_slot)) { > + spapr_drc_state_reset(drc_entry_slot); > + } else { > + if (drc_entry_slot->awaiting_release) { > + fprintf(stderr, "waiting for guest to release the device"); > + } else { > + drc_entry_slot->awaiting_release = true; > + } > + } > +} > + > +static int spapr_device_hotplug(DeviceState *qdev, PCIDevice *dev, > + PCIHotplugState state) > +{ sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(qdev); > + if (state == PCI_COLDPLUG_ENABLED) { > + return 0; > + } > + > + if (state == PCI_HOTPLUG_ENABLED) { > + spapr_device_hotplug_add(qdev, dev); > + } else { > + spapr_device_hotplug_remove(qdev, dev); > + } and here s/qdev/phb/? spapr_device_hotplug_(add|remove), spapr_pci_hotplug_(add|remove)_event (from further patch(es)) do not use qdev as a DeviceState anyway, they cast it to sPAPRPHBState and use that. > + > + return 0; > +} > + > static int spapr_phb_init(SysBusDevice *s) > { > DeviceState *dev = DEVICE(s); > @@ -889,6 +1255,7 @@ static int spapr_phb_init(SysBusDevice *s) > &sphb->memspace, &sphb->iospace, > PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS); > phb->bus = bus; > + pci_bus_hotplug(phb->bus, spapr_device_hotplug, DEVICE(sphb)); > > sphb->dma_window_start = 0; > sphb->dma_window_size = 0x40000000; > @@ -1181,14 +1548,6 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, > return bus_off; > } > > -#define _FDT(exp) \ > - do { \ > - int ret = (exp); \ > - if (ret < 0) { \ > - return ret; \ > - } \ > - } while (0) > - > /* Write PHB properties */ > _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci")); > _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB")); > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index 7c8a521..1c9b725 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -328,6 +328,7 @@ struct DrcEntry { > void *fdt; > int fdt_offset; > uint32_t state; > + bool awaiting_release; > ConfigureConnectorState cc_state; > DrcEntry *child_entries; > }; >
Quoting Alexey Kardashevskiy (2013-12-15 22:36:32) > On 12/06/2013 09:33 AM, Michael Roth wrote: > > From: Mike Day <ncmike@ncultra.org> > > > > This enables hotplug for PHB bridges. Upon hotplug we generate the > > OF-nodes required by PAPR specification and IEEE 1275-1994 > > "PCI Bus Binding to Open Firmware" for the device. > > > > We associate the corresponding FDT for these nodes with the DrcEntry > > corresponding to the slot, which will be fetched via > > ibm,configure-connector RTAS calls by the guest as described by PAPR > > specification. The FDT is cleaned up in the case of unplug. > > > > Amongst the required OF-node properties for each device are the "reg" > > and "assigned-addresses" properties which describe the BAR-assignments > > for IO/MEM/ROM regions. To handle these assignments we scan the address > > space associated with each region for a contiguous range of appropriate > > size based on PCI specification and encode these in accordance with > > Open Firmware PCI Bus Binding spec. > > > > These assignments will be used by the guest when the rpaphp hotplug > > module is used, but may be re-assigned by guests for cases where we > > rely on bus rescan. > > > > Signed-off-by: Mike Day <ncmike@ncultra.org> > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com> > > --- > > hw/ppc/spapr_pci.c | 375 ++++++++++++++++++++++++++++++++++++++++++++++-- > > include/hw/ppc/spapr.h | 1 + > > 2 files changed, 368 insertions(+), 8 deletions(-) > > > > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > > index 6e7ee31..9b4f829 100644 > > --- a/hw/ppc/spapr_pci.c > > +++ b/hw/ppc/spapr_pci.c > > @@ -56,6 +56,17 @@ > > #define RTAS_TYPE_MSI 1 > > #define RTAS_TYPE_MSIX 2 > > > > +#define FDT_MAX_SIZE 0x10000 > > +#define _FDT(exp) \ > > + do { \ > > + int ret = (exp); \ > > + if (ret < 0) { \ > > + return ret; \ > > + } \ > > + } while (0) > > + > > +static void spapr_drc_state_reset(DrcEntry *drc_entry); > > + > > static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid) > > { > > sPAPRPHBState *sphb; > > @@ -448,6 +459,22 @@ static void rtas_set_indicator(PowerPCCPU *cpu, sPAPREnvironment *spapr, > > /* encode the new value into the correct bit field */ > > shift = INDICATOR_ISOLATION_SHIFT; > > mask = INDICATOR_ISOLATION_MASK; > > + if (drc_entry) { > > + /* transition from unisolated to isolated for a hotplug slot > > + * entails completion of guest-side device unplug/cleanup, so > > + * we can now safely remove the device if qemu is waiting for > > + * it to be released > > + */ > > + if (DECODE_DRC_STATE(*pind, mask, shift) != indicator_state) { > > + if (indicator_state == 0 && drc_entry->awaiting_release) { > > + /* device_del has been called and host is waiting > > + * for guest to release/isolate device, go ahead > > + * and remove it now > > + */ > > + spapr_drc_state_reset(drc_entry); > > + } > > + } > > + } > > break; > > case 9002: /* DR */ > > shift = INDICATOR_DR_SHIFT; > > @@ -776,6 +803,345 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) > > return &phb->iommu_as; > > } > > > > +/* for 'reg'/'assigned-addresses' OF properties */ > > +#define RESOURCE_CELLS_SIZE 2 > > +#define RESOURCE_CELLS_ADDRESS 3 > > +#define RESOURCE_CELLS_TOTAL \ > > + (RESOURCE_CELLS_SIZE + RESOURCE_CELLS_ADDRESS) > > + > > +static void fill_resource_props(PCIDevice *d, int bus_num, > > + uint32_t *reg, int *reg_size, > > + uint32_t *assigned, int *assigned_size) > > +{ > > + uint32_t *reg_row, *assigned_row; > > + uint32_t dev_id = ((bus_num << 8) | > > + (PCI_SLOT(d->devfn) << 3) | PCI_FUNC(d->devfn)); > > + int i, idx = 0; > > + > > + reg[0] = cpu_to_be32(dev_id << 8); > > + > > + for (i = 0; i < PCI_NUM_REGIONS; i++) { > > + if (!d->io_regions[i].size) { > > + continue; > > + } > > + reg_row = ®[(idx + 1) * RESOURCE_CELLS_TOTAL]; > > + assigned_row = &assigned[idx * RESOURCE_CELLS_TOTAL]; > > + reg_row[0] = cpu_to_be32((dev_id << 8) | (pci_bar(d, i) & 0xff)); > > + if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { > > + reg_row[0] |= cpu_to_be32(0x01000000); > > + } else { > > + reg_row[0] |= cpu_to_be32(0x02000000); > > + } > > + assigned_row[0] = cpu_to_be32(reg_row[0] | 0x80000000); > > > 0x80000000 == relocatable? 0x01000000/0x02000000 - space codes? There are > macros (b_n, b_ss) in this file, can you please use them? Ah, those look handy, thanks > > > > + assigned_row[3] = reg_row[3] = cpu_to_be32(d->io_regions[i].size >> 32); > > + assigned_row[4] = reg_row[4] = cpu_to_be32(d->io_regions[i].size); > > + assigned_row[1] = cpu_to_be32(d->io_regions[i].addr >> 32); > > + assigned_row[2] = cpu_to_be32(d->io_regions[i].addr); > > + idx++; > > + } > > + > > + *reg_size = (idx + 1) * RESOURCE_CELLS_TOTAL * sizeof(uint32_t); > > + *assigned_size = idx * RESOURCE_CELLS_TOTAL * sizeof(uint32_t); > > +} > > + > > +static hwaddr spapr_find_bar_addr(sPAPRPHBState *phb, PCIIORegion *r) > > > This does not use @pbh at all and therefore can go to hw/pci/pci.c may be > (which can be tricky though)? SPAPR_PCI_MEM_WIN_BUS_OFFSET is certainly platform specific, so that would need to be generalized. Beyond that, I don't have enough experience to know how useful this would be for other platforms, but perhaps it's general enough for others to add-to/re-use in the future. I'll take a stab at moving it to hw/pci/pci.c and Cc the appropriate maintainers. > > > > +{ > > + MemoryRegionSection mrs = { 0 }; > > + hwaddr search_addr; > > + hwaddr size = r->size; > > + hwaddr addr_mask = ~(size - 1); > > + hwaddr increment = size; > > + hwaddr limit; > > + > > + if (r->type == PCI_BASE_ADDRESS_SPACE_MEMORY) { > > + /* beginning portion of mmio address space for bus does not get > > + * mapped into system memory, so calculate addr starting at the > > + * corresponding offset into mmio as. > > + */ > > + search_addr = (SPAPR_PCI_MEM_WIN_BUS_OFFSET + increment) & addr_mask; > > + } else { > > + search_addr = increment; > > + } > > + limit = memory_region_size(r->address_space); > > + > > + do { > > + mrs = memory_region_find_subregion(r->address_space, search_addr, size); > > + if (mrs.mr) { > > + hwaddr mr_last_addr; > > + mr_last_addr = mrs.mr->addr + memory_region_size(mrs.mr) - 1; > > + search_addr = (mr_last_addr + 1) & addr_mask; > > + if (search_addr <= mr_last_addr) { > > + search_addr += increment; > > + } > > + /* this memory region overlaps, unref and continue searching */ > > + memory_region_unref(mrs.mr); > > + } > > + } while (int128_nz(mrs.size) && search_addr + size <= limit); > > + > > + if (search_addr + size >= limit) { > > + return PCI_BAR_UNMAPPED; > > + } > > + > > + return search_addr; > > +} > > + > > +static int spapr_map_bars(sPAPRPHBState *phb, PCIDevice *dev) > > This does not use @phb, well, it uses to call spapr_find_bar_addr() but > that function does not use it either. > > Yet another candidate to get moved to hw/pci/pci.c? If you do so, you'll > get even more reviews :) > > > > +{ > > + PCIIORegion *r; > > + int i, ret = -1; > > + > > + for (i = 0; i < PCI_NUM_REGIONS; i++) { > > + uint32_t bar_address = pci_bar(dev, i); > > + uint32_t bar_value; > > + uint16_t cmd_value = pci_default_read_config(dev, PCI_COMMAND, 2); > > + hwaddr addr; > > + > > + r = &dev->io_regions[i]; > > + > > + /* this region isn't registered */ > > + if (!r->size) { > > + continue; > > + } > > + > > + /* find a hw addr we can map */ > > + addr = spapr_find_bar_addr(phb, r); > > + if (addr == PCI_BAR_UNMAPPED) { > > + /* we can't find a free range within address space for this BAR */ > > + fprintf(stderr, > > + "Unable to map BAR %d, no free range available\n", i); > > + return -1; > > + } > > + /* we can probably map this region into memory if there is not > > + * a race condition with some other allocator. write the address > > + * to the device BAR which will force a call to pci_update_mappings > > + */ > > + if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { > > + pci_default_write_config(dev, PCI_COMMAND, > > + cmd_value | PCI_COMMAND_IO, 2); > > + } else { > > + pci_default_write_config(dev, PCI_COMMAND, > > + cmd_value | PCI_COMMAND_MEMORY, 2); > > + } > > + > > + bar_value = addr; > > + > > + if (i == PCI_ROM_SLOT) { > > + bar_value |= PCI_ROM_ADDRESS_ENABLE; > > + } > > + /* write the new bar value */ > > + pci_default_write_config(dev, bar_address, bar_value, 4); > > + > > + /* if this is a 64-bit BAR, we need to also write the > > + * upper 32 bit value. > > + */ > > + if (r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { > > + bar_value = (addr >> 32) & 0xffffffffUL; > > + pci_default_write_config(dev, bar_address + 4, bar_value, 4); > > + } > > + ret = 0; > > + } > > + return ret; > > +} > > + > > +static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, > > + int phb_index) > > +{ > > + int slot = PCI_SLOT(dev->devfn); > > + char slotname[16]; > > + bool is_bridge = 1; > > + DrcEntry *drc_entry, *drc_entry_slot; > > + uint32_t reg[RESOURCE_CELLS_TOTAL * 8] = { 0 }; > > + uint32_t assigned[RESOURCE_CELLS_TOTAL * 8] = { 0 }; > > + int reg_size, assigned_size; > > + > > + drc_entry = spapr_phb_to_drc_entry(phb_index + SPAPR_PCI_BASE_BUID); > > + g_assert(drc_entry); > > + drc_entry_slot = &drc_entry->child_entries[slot]; > > + > > + if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == > > > s/1/PCI_HEADER_TYPE_BRIDGE/ > > > > + PCI_HEADER_TYPE_NORMAL) { > > + is_bridge = 0; > > + } > > + > > + _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", > > + pci_default_read_config(dev, PCI_VENDOR_ID, 2))); > > + _FDT(fdt_setprop_cell(fdt, offset, "device-id", > > + pci_default_read_config(dev, PCI_DEVICE_ID, 2))); > > + _FDT(fdt_setprop_cell(fdt, offset, "revision-id", > > + pci_default_read_config(dev, PCI_REVISION_ID, 1))); > > + _FDT(fdt_setprop_cell(fdt, offset, "class-code", > > + pci_default_read_config(dev, PCI_CLASS_DEVICE, 2) << 8)); > > + > > + _FDT(fdt_setprop_cell(fdt, offset, "interrupts", > > + pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1))); > > + > > + /* if this device is NOT a bridge */ > > + if (!is_bridge) { > > > s/!is_bridge/pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == > PCI_HEADER_TYPE_NORMAL/ > > and get rid of is_bridge? > > > > > + _FDT(fdt_setprop_cell(fdt, offset, "min-grant", > > + pci_default_read_config(dev, PCI_MIN_GNT, 1))); > > + _FDT(fdt_setprop_cell(fdt, offset, "max-latency", > > + pci_default_read_config(dev, PCI_MAX_LAT, 1))); > > + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", > > + pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2))); > > + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id", > > + pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2))); > > + } > > + > > + _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", > > + pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1))); > > + > > + /* the following fdt cells are masked off the pci status register */ > > + int pci_status = pci_default_read_config(dev, PCI_STATUS, 2); > > + _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed", > > + PCI_STATUS_DEVSEL_MASK & pci_status)); > > + _FDT(fdt_setprop_cell(fdt, offset, "fast-back-to-back", > > + PCI_STATUS_FAST_BACK & pci_status)); > > + _FDT(fdt_setprop_cell(fdt, offset, "66mhz-capable", > > + PCI_STATUS_66MHZ & pci_status)); > > + _FDT(fdt_setprop_cell(fdt, offset, "udf-supported", > > + PCI_STATUS_UDF & pci_status)); > > + > > + _FDT(fdt_setprop_string(fdt, offset, "name", "pci")); > > + sprintf(slotname, "Slot %d", slot + phb_index * 32); > > + _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", slotname, strlen(slotname))); > > + _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", > > + drc_entry_slot->drc_index)); > > + > > + _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", > > + RESOURCE_CELLS_ADDRESS)); > > + _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", > > + RESOURCE_CELLS_SIZE)); > > + _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", > > + RESOURCE_CELLS_SIZE)); > > + fill_resource_props(dev, phb_index, reg, ®_size, > > + assigned, &assigned_size); > > + _FDT(fdt_setprop(fdt, offset, "reg", reg, reg_size)); > > + _FDT(fdt_setprop(fdt, offset, "assigned-addresses", > > + assigned, assigned_size)); > > + > > + return 0; > > +} > > + > > +static int spapr_device_hotplug_add(DeviceState *qdev, PCIDevice *dev) > > +{ > > + sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(qdev); > > + DrcEntry *drc_entry, *drc_entry_slot; > > + ConfigureConnectorState *ccs; > > + int slot = PCI_SLOT(dev->devfn); > > + int offset, ret; > > + void *fdt_orig, *fdt; > > + char nodename[512]; > > + uint32_t encoded = ENCODE_DRC_STATE(INDICATOR_ENTITY_SENSE_PRESENT, > > + INDICATOR_ENTITY_SENSE_MASK, > > + INDICATOR_ENTITY_SENSE_SHIFT); > > + > > + drc_entry = spapr_phb_to_drc_entry(phb->buid); > > + g_assert(drc_entry); > > + drc_entry_slot = &drc_entry->child_entries[slot]; > > + > > + drc_entry->state &= ~(uint32_t)INDICATOR_ENTITY_SENSE_MASK; > > + drc_entry->state |= encoded; /* DR entity present */ > > + drc_entry_slot->state &= ~(uint32_t)INDICATOR_ENTITY_SENSE_MASK; > > + drc_entry_slot->state |= encoded; /* and the slot */ > > > "and the slot" what? > s/uint32_t encoded/const uint32_t present/ and remove the comments? > > > > + /* need to allocate memory region for device BARs */ > > + spapr_map_bars(phb, dev); > > + > > + /* add OF node for pci device and required OF DT properties */ > > + fdt_orig = g_malloc0(FDT_MAX_SIZE); > > + offset = fdt_create(fdt_orig, FDT_MAX_SIZE); > > + fdt_begin_node(fdt_orig, ""); > > + fdt_end_node(fdt_orig); > > + fdt_finish(fdt_orig); > > + > > + fdt = g_malloc0(FDT_MAX_SIZE); > > + fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE); > > + sprintf(nodename, "pci@%d", slot); > > + offset = fdt_add_subnode(fdt, 0, nodename); > > + ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index); > > + g_assert(!ret); > > + g_free(fdt_orig); > > + > > + /* hold on to node, configure_connector will pass it to the guest later */ > > + ccs = &drc_entry_slot->cc_state; > > + ccs->fdt = fdt; > > + ccs->offset_start = offset; > > + ccs->state = CC_STATE_PENDING; > > + ccs->dev = dev; > > + > > + return 0; > > +} > > + > > +/* check whether guest has released/isolated device */ > > +static bool spapr_drc_state_is_releasable(DrcEntry *drc_entry) > > +{ > > + return !DECODE_DRC_STATE(drc_entry->state, > > + INDICATOR_ISOLATION_MASK, > > + INDICATOR_ISOLATION_SHIFT); > > +} > > It looks like this is the only separated function which calls > DECODE_DRC_STATE, and it is used just once, and others call > DECODE_DRC_STATE()/ENCODE_DRC_STATE() directly. I'd remove this function > and call DECODE_DRC_STATE() directly, below in the code. > > > > + > > +/* finalize device unplug/deletion */ > > +static void spapr_drc_state_reset(DrcEntry *drc_entry) > > +{ > > + ConfigureConnectorState *ccs = &drc_entry->cc_state; > > + uint32_t sense_empty = ENCODE_DRC_STATE(INDICATOR_ENTITY_SENSE_EMPTY, > > + INDICATOR_ENTITY_SENSE_MASK, > > + INDICATOR_ENTITY_SENSE_SHIFT); > > + > > + g_free(ccs->fdt); > > + ccs->fdt = NULL; > > + object_unparent(OBJECT(ccs->dev)); > > + ccs->dev = NULL; > > + ccs->state = CC_STATE_IDLE; > > + drc_entry->state &= ~INDICATOR_ENTITY_SENSE_MASK; > > + drc_entry->state |= sense_empty; > > + drc_entry->awaiting_release = false; > > +} > > + > > +static void spapr_device_hotplug_remove(DeviceState *qdev, PCIDevice *dev) > > +{ > > + sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(qdev); > > + DrcEntry *drc_entry, *drc_entry_slot; > > + ConfigureConnectorState *ccs; > > + int slot = PCI_SLOT(dev->devfn); > > + > > + drc_entry = spapr_phb_to_drc_entry(phb->buid); > > + g_assert(drc_entry); > > + drc_entry_slot = &drc_entry->child_entries[slot]; > > + ccs = &drc_entry_slot->cc_state; > > + /* shouldn't be removing devices we haven't created an fdt for */ > > + g_assert(ccs->state != CC_STATE_IDLE); > > > Instead of g_assert(), would not it be better to return -1 here and then > return this return code from spapr_device_hotplug() and let common PCI code > handle this? > > Or we are absolutely sure that spapr_device_hotplug() cannot possibly fail > so we are ready to kill the guest if it does? I do not know, just asking :) CC_STATE_IDLE describes an empty slot in the context of PCI, so we should never hit the assertion. It more of a development aid to ensure proper code handling during initial device creation. This should not be triggerable by users. > > > > + /* if the device has already been released/isolated by guest, go ahead > > + * and remove it now. Otherwise, flag it as pending guest release so it > > + * can be removed later > > + */ > > + if (spapr_drc_state_is_releasable(drc_entry_slot)) { > > + spapr_drc_state_reset(drc_entry_slot); > > + } else { > > + if (drc_entry_slot->awaiting_release) { > > + fprintf(stderr, "waiting for guest to release the device"); > > + } else { > > + drc_entry_slot->awaiting_release = true; > > + } > > + } > > +} > > + > > +static int spapr_device_hotplug(DeviceState *qdev, PCIDevice *dev, > > + PCIHotplugState state) > > +{ > > sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(qdev); > > > + if (state == PCI_COLDPLUG_ENABLED) { > > + return 0; > > + } > > + > > + if (state == PCI_HOTPLUG_ENABLED) { > > + spapr_device_hotplug_add(qdev, dev); > > + } else { > > + spapr_device_hotplug_remove(qdev, dev); > > + } > > and here s/qdev/phb/? spapr_device_hotplug_(add|remove), > spapr_pci_hotplug_(add|remove)_event (from further patch(es)) do not use > qdev as a DeviceState anyway, they cast it to sPAPRPHBState and use that. > That's cleaner, will fix. > > > > + > > + return 0; > > +} > > + > > static int spapr_phb_init(SysBusDevice *s) > > { > > DeviceState *dev = DEVICE(s); > > @@ -889,6 +1255,7 @@ static int spapr_phb_init(SysBusDevice *s) > > &sphb->memspace, &sphb->iospace, > > PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS); > > phb->bus = bus; > > + pci_bus_hotplug(phb->bus, spapr_device_hotplug, DEVICE(sphb)); > > > > sphb->dma_window_start = 0; > > sphb->dma_window_size = 0x40000000; > > @@ -1181,14 +1548,6 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, > > return bus_off; > > } > > > > -#define _FDT(exp) \ > > - do { \ > > - int ret = (exp); \ > > - if (ret < 0) { \ > > - return ret; \ > > - } \ > > - } while (0) > > - > > /* Write PHB properties */ > > _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci")); > > _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB")); > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > > index 7c8a521..1c9b725 100644 > > --- a/include/hw/ppc/spapr.h > > +++ b/include/hw/ppc/spapr.h > > @@ -328,6 +328,7 @@ struct DrcEntry { > > void *fdt; > > int fdt_offset; > > uint32_t state; > > + bool awaiting_release; > > ConfigureConnectorState cc_state; > > DrcEntry *child_entries; > > }; > > > > > -- > Alexey
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 6e7ee31..9b4f829 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -56,6 +56,17 @@ #define RTAS_TYPE_MSI 1 #define RTAS_TYPE_MSIX 2 +#define FDT_MAX_SIZE 0x10000 +#define _FDT(exp) \ + do { \ + int ret = (exp); \ + if (ret < 0) { \ + return ret; \ + } \ + } while (0) + +static void spapr_drc_state_reset(DrcEntry *drc_entry); + static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid) { sPAPRPHBState *sphb; @@ -448,6 +459,22 @@ static void rtas_set_indicator(PowerPCCPU *cpu, sPAPREnvironment *spapr, /* encode the new value into the correct bit field */ shift = INDICATOR_ISOLATION_SHIFT; mask = INDICATOR_ISOLATION_MASK; + if (drc_entry) { + /* transition from unisolated to isolated for a hotplug slot + * entails completion of guest-side device unplug/cleanup, so + * we can now safely remove the device if qemu is waiting for + * it to be released + */ + if (DECODE_DRC_STATE(*pind, mask, shift) != indicator_state) { + if (indicator_state == 0 && drc_entry->awaiting_release) { + /* device_del has been called and host is waiting + * for guest to release/isolate device, go ahead + * and remove it now + */ + spapr_drc_state_reset(drc_entry); + } + } + } break; case 9002: /* DR */ shift = INDICATOR_DR_SHIFT; @@ -776,6 +803,345 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &phb->iommu_as; } +/* for 'reg'/'assigned-addresses' OF properties */ +#define RESOURCE_CELLS_SIZE 2 +#define RESOURCE_CELLS_ADDRESS 3 +#define RESOURCE_CELLS_TOTAL \ + (RESOURCE_CELLS_SIZE + RESOURCE_CELLS_ADDRESS) + +static void fill_resource_props(PCIDevice *d, int bus_num, + uint32_t *reg, int *reg_size, + uint32_t *assigned, int *assigned_size) +{ + uint32_t *reg_row, *assigned_row; + uint32_t dev_id = ((bus_num << 8) | + (PCI_SLOT(d->devfn) << 3) | PCI_FUNC(d->devfn)); + int i, idx = 0; + + reg[0] = cpu_to_be32(dev_id << 8); + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + if (!d->io_regions[i].size) { + continue; + } + reg_row = ®[(idx + 1) * RESOURCE_CELLS_TOTAL]; + assigned_row = &assigned[idx * RESOURCE_CELLS_TOTAL]; + reg_row[0] = cpu_to_be32((dev_id << 8) | (pci_bar(d, i) & 0xff)); + if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { + reg_row[0] |= cpu_to_be32(0x01000000); + } else { + reg_row[0] |= cpu_to_be32(0x02000000); + } + assigned_row[0] = cpu_to_be32(reg_row[0] | 0x80000000); + assigned_row[3] = reg_row[3] = cpu_to_be32(d->io_regions[i].size >> 32); + assigned_row[4] = reg_row[4] = cpu_to_be32(d->io_regions[i].size); + assigned_row[1] = cpu_to_be32(d->io_regions[i].addr >> 32); + assigned_row[2] = cpu_to_be32(d->io_regions[i].addr); + idx++; + } + + *reg_size = (idx + 1) * RESOURCE_CELLS_TOTAL * sizeof(uint32_t); + *assigned_size = idx * RESOURCE_CELLS_TOTAL * sizeof(uint32_t); +} + +static hwaddr spapr_find_bar_addr(sPAPRPHBState *phb, PCIIORegion *r) +{ + MemoryRegionSection mrs = { 0 }; + hwaddr search_addr; + hwaddr size = r->size; + hwaddr addr_mask = ~(size - 1); + hwaddr increment = size; + hwaddr limit; + + if (r->type == PCI_BASE_ADDRESS_SPACE_MEMORY) { + /* beginning portion of mmio address space for bus does not get + * mapped into system memory, so calculate addr starting at the + * corresponding offset into mmio as. + */ + search_addr = (SPAPR_PCI_MEM_WIN_BUS_OFFSET + increment) & addr_mask; + } else { + search_addr = increment; + } + limit = memory_region_size(r->address_space); + + do { + mrs = memory_region_find_subregion(r->address_space, search_addr, size); + if (mrs.mr) { + hwaddr mr_last_addr; + mr_last_addr = mrs.mr->addr + memory_region_size(mrs.mr) - 1; + search_addr = (mr_last_addr + 1) & addr_mask; + if (search_addr <= mr_last_addr) { + search_addr += increment; + } + /* this memory region overlaps, unref and continue searching */ + memory_region_unref(mrs.mr); + } + } while (int128_nz(mrs.size) && search_addr + size <= limit); + + if (search_addr + size >= limit) { + return PCI_BAR_UNMAPPED; + } + + return search_addr; +} + +static int spapr_map_bars(sPAPRPHBState *phb, PCIDevice *dev) +{ + PCIIORegion *r; + int i, ret = -1; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + uint32_t bar_address = pci_bar(dev, i); + uint32_t bar_value; + uint16_t cmd_value = pci_default_read_config(dev, PCI_COMMAND, 2); + hwaddr addr; + + r = &dev->io_regions[i]; + + /* this region isn't registered */ + if (!r->size) { + continue; + } + + /* find a hw addr we can map */ + addr = spapr_find_bar_addr(phb, r); + if (addr == PCI_BAR_UNMAPPED) { + /* we can't find a free range within address space for this BAR */ + fprintf(stderr, + "Unable to map BAR %d, no free range available\n", i); + return -1; + } + /* we can probably map this region into memory if there is not + * a race condition with some other allocator. write the address + * to the device BAR which will force a call to pci_update_mappings + */ + if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { + pci_default_write_config(dev, PCI_COMMAND, + cmd_value | PCI_COMMAND_IO, 2); + } else { + pci_default_write_config(dev, PCI_COMMAND, + cmd_value | PCI_COMMAND_MEMORY, 2); + } + + bar_value = addr; + + if (i == PCI_ROM_SLOT) { + bar_value |= PCI_ROM_ADDRESS_ENABLE; + } + /* write the new bar value */ + pci_default_write_config(dev, bar_address, bar_value, 4); + + /* if this is a 64-bit BAR, we need to also write the + * upper 32 bit value. + */ + if (r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + bar_value = (addr >> 32) & 0xffffffffUL; + pci_default_write_config(dev, bar_address + 4, bar_value, 4); + } + ret = 0; + } + return ret; +} + +static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, + int phb_index) +{ + int slot = PCI_SLOT(dev->devfn); + char slotname[16]; + bool is_bridge = 1; + DrcEntry *drc_entry, *drc_entry_slot; + uint32_t reg[RESOURCE_CELLS_TOTAL * 8] = { 0 }; + uint32_t assigned[RESOURCE_CELLS_TOTAL * 8] = { 0 }; + int reg_size, assigned_size; + + drc_entry = spapr_phb_to_drc_entry(phb_index + SPAPR_PCI_BASE_BUID); + g_assert(drc_entry); + drc_entry_slot = &drc_entry->child_entries[slot]; + + if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == + PCI_HEADER_TYPE_NORMAL) { + is_bridge = 0; + } + + _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", + pci_default_read_config(dev, PCI_VENDOR_ID, 2))); + _FDT(fdt_setprop_cell(fdt, offset, "device-id", + pci_default_read_config(dev, PCI_DEVICE_ID, 2))); + _FDT(fdt_setprop_cell(fdt, offset, "revision-id", + pci_default_read_config(dev, PCI_REVISION_ID, 1))); + _FDT(fdt_setprop_cell(fdt, offset, "class-code", + pci_default_read_config(dev, PCI_CLASS_DEVICE, 2) << 8)); + + _FDT(fdt_setprop_cell(fdt, offset, "interrupts", + pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1))); + + /* if this device is NOT a bridge */ + if (!is_bridge) { + _FDT(fdt_setprop_cell(fdt, offset, "min-grant", + pci_default_read_config(dev, PCI_MIN_GNT, 1))); + _FDT(fdt_setprop_cell(fdt, offset, "max-latency", + pci_default_read_config(dev, PCI_MAX_LAT, 1))); + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", + pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2))); + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id", + pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2))); + } + + _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", + pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1))); + + /* the following fdt cells are masked off the pci status register */ + int pci_status = pci_default_read_config(dev, PCI_STATUS, 2); + _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed", + PCI_STATUS_DEVSEL_MASK & pci_status)); + _FDT(fdt_setprop_cell(fdt, offset, "fast-back-to-back", + PCI_STATUS_FAST_BACK & pci_status)); + _FDT(fdt_setprop_cell(fdt, offset, "66mhz-capable", + PCI_STATUS_66MHZ & pci_status)); + _FDT(fdt_setprop_cell(fdt, offset, "udf-supported", + PCI_STATUS_UDF & pci_status)); + + _FDT(fdt_setprop_string(fdt, offset, "name", "pci")); + sprintf(slotname, "Slot %d", slot + phb_index * 32); + _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", slotname, strlen(slotname))); + _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", + drc_entry_slot->drc_index)); + + _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", + RESOURCE_CELLS_ADDRESS)); + _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", + RESOURCE_CELLS_SIZE)); + _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", + RESOURCE_CELLS_SIZE)); + fill_resource_props(dev, phb_index, reg, ®_size, + assigned, &assigned_size); + _FDT(fdt_setprop(fdt, offset, "reg", reg, reg_size)); + _FDT(fdt_setprop(fdt, offset, "assigned-addresses", + assigned, assigned_size)); + + return 0; +} + +static int spapr_device_hotplug_add(DeviceState *qdev, PCIDevice *dev) +{ + sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(qdev); + DrcEntry *drc_entry, *drc_entry_slot; + ConfigureConnectorState *ccs; + int slot = PCI_SLOT(dev->devfn); + int offset, ret; + void *fdt_orig, *fdt; + char nodename[512]; + uint32_t encoded = ENCODE_DRC_STATE(INDICATOR_ENTITY_SENSE_PRESENT, + INDICATOR_ENTITY_SENSE_MASK, + INDICATOR_ENTITY_SENSE_SHIFT); + + drc_entry = spapr_phb_to_drc_entry(phb->buid); + g_assert(drc_entry); + drc_entry_slot = &drc_entry->child_entries[slot]; + + drc_entry->state &= ~(uint32_t)INDICATOR_ENTITY_SENSE_MASK; + drc_entry->state |= encoded; /* DR entity present */ + drc_entry_slot->state &= ~(uint32_t)INDICATOR_ENTITY_SENSE_MASK; + drc_entry_slot->state |= encoded; /* and the slot */ + + /* need to allocate memory region for device BARs */ + spapr_map_bars(phb, dev); + + /* add OF node for pci device and required OF DT properties */ + fdt_orig = g_malloc0(FDT_MAX_SIZE); + offset = fdt_create(fdt_orig, FDT_MAX_SIZE); + fdt_begin_node(fdt_orig, ""); + fdt_end_node(fdt_orig); + fdt_finish(fdt_orig); + + fdt = g_malloc0(FDT_MAX_SIZE); + fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE); + sprintf(nodename, "pci@%d", slot); + offset = fdt_add_subnode(fdt, 0, nodename); + ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index); + g_assert(!ret); + g_free(fdt_orig); + + /* hold on to node, configure_connector will pass it to the guest later */ + ccs = &drc_entry_slot->cc_state; + ccs->fdt = fdt; + ccs->offset_start = offset; + ccs->state = CC_STATE_PENDING; + ccs->dev = dev; + + return 0; +} + +/* check whether guest has released/isolated device */ +static bool spapr_drc_state_is_releasable(DrcEntry *drc_entry) +{ + return !DECODE_DRC_STATE(drc_entry->state, + INDICATOR_ISOLATION_MASK, + INDICATOR_ISOLATION_SHIFT); +} + +/* finalize device unplug/deletion */ +static void spapr_drc_state_reset(DrcEntry *drc_entry) +{ + ConfigureConnectorState *ccs = &drc_entry->cc_state; + uint32_t sense_empty = ENCODE_DRC_STATE(INDICATOR_ENTITY_SENSE_EMPTY, + INDICATOR_ENTITY_SENSE_MASK, + INDICATOR_ENTITY_SENSE_SHIFT); + + g_free(ccs->fdt); + ccs->fdt = NULL; + object_unparent(OBJECT(ccs->dev)); + ccs->dev = NULL; + ccs->state = CC_STATE_IDLE; + drc_entry->state &= ~INDICATOR_ENTITY_SENSE_MASK; + drc_entry->state |= sense_empty; + drc_entry->awaiting_release = false; +} + +static void spapr_device_hotplug_remove(DeviceState *qdev, PCIDevice *dev) +{ + sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(qdev); + DrcEntry *drc_entry, *drc_entry_slot; + ConfigureConnectorState *ccs; + int slot = PCI_SLOT(dev->devfn); + + drc_entry = spapr_phb_to_drc_entry(phb->buid); + g_assert(drc_entry); + drc_entry_slot = &drc_entry->child_entries[slot]; + ccs = &drc_entry_slot->cc_state; + /* shouldn't be removing devices we haven't created an fdt for */ + g_assert(ccs->state != CC_STATE_IDLE); + /* if the device has already been released/isolated by guest, go ahead + * and remove it now. Otherwise, flag it as pending guest release so it + * can be removed later + */ + if (spapr_drc_state_is_releasable(drc_entry_slot)) { + spapr_drc_state_reset(drc_entry_slot); + } else { + if (drc_entry_slot->awaiting_release) { + fprintf(stderr, "waiting for guest to release the device"); + } else { + drc_entry_slot->awaiting_release = true; + } + } +} + +static int spapr_device_hotplug(DeviceState *qdev, PCIDevice *dev, + PCIHotplugState state) +{ + if (state == PCI_COLDPLUG_ENABLED) { + return 0; + } + + if (state == PCI_HOTPLUG_ENABLED) { + spapr_device_hotplug_add(qdev, dev); + } else { + spapr_device_hotplug_remove(qdev, dev); + } + + return 0; +} + static int spapr_phb_init(SysBusDevice *s) { DeviceState *dev = DEVICE(s); @@ -889,6 +1255,7 @@ static int spapr_phb_init(SysBusDevice *s) &sphb->memspace, &sphb->iospace, PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS); phb->bus = bus; + pci_bus_hotplug(phb->bus, spapr_device_hotplug, DEVICE(sphb)); sphb->dma_window_start = 0; sphb->dma_window_size = 0x40000000; @@ -1181,14 +1548,6 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, return bus_off; } -#define _FDT(exp) \ - do { \ - int ret = (exp); \ - if (ret < 0) { \ - return ret; \ - } \ - } while (0) - /* Write PHB properties */ _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci")); _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB")); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 7c8a521..1c9b725 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -328,6 +328,7 @@ struct DrcEntry { void *fdt; int fdt_offset; uint32_t state; + bool awaiting_release; ConfigureConnectorState cc_state; DrcEntry *child_entries; };