From patchwork Tue Oct 4 14:51:20 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Anthony PERARD X-Patchwork-Id: 117647 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [140.186.70.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 9EB7CB6F7D for ; Wed, 5 Oct 2011 02:11:12 +1100 (EST) Received: from localhost ([::1]:47778 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1RB6Ml-0003MI-Od for incoming@patchwork.ozlabs.org; Tue, 04 Oct 2011 10:53:07 -0400 Received: from eggs.gnu.org ([140.186.70.92]:41225) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1RB6Ld-0008Sb-0W for qemu-devel@nongnu.org; Tue, 04 Oct 2011 10:52:06 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1RB6LR-0003qc-VC for qemu-devel@nongnu.org; Tue, 04 Oct 2011 10:51:56 -0400 Received: from smtp.citrix.com ([66.165.176.89]:14140) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1RB6LQ-0003mp-Ew for qemu-devel@nongnu.org; Tue, 04 Oct 2011 10:51:45 -0400 X-IronPort-AV: E=Sophos;i="4.68,485,1312171200"; d="scan'208";a="17928385" Received: from ftlpmailmx02.citrite.net ([10.13.107.66]) by FTLPIPO01.CITRIX.COM with ESMTP/TLS/RC4-MD5; 04 Oct 2011 10:51:43 -0400 Received: from smtp01.ad.xensource.com (10.219.128.104) by smtprelay.citrix.com (10.13.107.66) with Microsoft SMTP Server id 8.3.137.0; Tue, 4 Oct 2011 10:51:43 -0400 Received: from perard.uk.xensource.com (dhcp-3-28.uk.xensource.com [10.80.3.28] (may be forged)) by smtp01.ad.xensource.com (8.13.1/8.13.1) with ESMTP id p94EpSsA017018; Tue, 4 Oct 2011 07:51:42 -0700 From: Anthony PERARD To: QEMU-devel Date: Tue, 4 Oct 2011 15:51:20 +0100 Message-ID: <1317739882-4809-10-git-send-email-anthony.perard@citrix.com> X-Mailer: git-send-email 1.7.2.5 In-Reply-To: <1317739882-4809-1-git-send-email-anthony.perard@citrix.com> References: <1317739882-4809-1-git-send-email-anthony.perard@citrix.com> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 66.165.176.89 Cc: Anthony PERARD , Alex Williamson , Xen Devel , Stefano Stabellini Subject: [Qemu-devel] [PATCH RFC V1 09/11] Introduce Xen PCI Passthrough, PCI config space helpers (2/3) X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Signed-off-by: Anthony PERARD --- hw/xen_pci_passthrough_config_init.c | 2489 ++++++++++++++++++++++++++++++++++ 1 files changed, 2489 insertions(+), 0 deletions(-) create mode 100644 hw/xen_pci_passthrough_config_init.c diff --git a/hw/xen_pci_passthrough_config_init.c b/hw/xen_pci_passthrough_config_init.c new file mode 100644 index 0000000..4bf9c15 --- /dev/null +++ b/hw/xen_pci_passthrough_config_init.c @@ -0,0 +1,2489 @@ +#include "qemu-timer.h" +#include "xen_backend.h" +#include "xen_pci_passthrough.h" + +#define PT_MERGE_VALUE(value, data, val_mask) \ + (((value) & (val_mask)) | ((data) & ~(val_mask))) + +/* prototype */ + +static uint32_t pt_ptr_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg, + uint32_t real_offset); +static int pt_init_pci_config(XenPCIPassthroughState *s); + + +/* helper */ + +/* A return value of 1 means the capability should NOT be exposed to guest. */ +static int pt_hide_dev_cap(const HostPCIDevice *d, uint8_t grp_id) +{ + switch (grp_id) { + case PCI_CAP_ID_EXP: + /* The PCI Express Capability Structure of the VF of Intel 82599 10GbE + * Controller looks trivial, e.g., the PCI Express Capabilities + * Register is 0. We should not try to expose it to guest. + */ + if (d->vendor_id == PCI_VENDOR_ID_INTEL && + d->device_id == PCI_DEVICE_ID_INTEL_82599_VF) { + return 1; + } + break; + } + return 0; +} + +/* find emulate register group entry */ +XenPTRegGroup *pt_find_reg_grp(XenPCIPassthroughState *s, uint32_t address) +{ + XenPTRegGroup *entry = NULL; + + /* find register group entry */ + QLIST_FOREACH(entry, &s->reg_grp_tbl, entries) { + /* check address */ + if ((entry->base_offset <= address) + && ((entry->base_offset + entry->size) > address)) { + return entry; + } + } + + /* group entry not found */ + return NULL; +} + +/* find emulate register entry */ +XenPTReg *pt_find_reg(XenPTRegGroup *reg_grp, uint32_t address) +{ + XenPTReg *reg_entry = NULL; + XenPTRegInfo *reg = NULL; + uint32_t real_offset = 0; + + /* find register entry */ + QLIST_FOREACH(reg_entry, ®_grp->reg_tbl_list, entries) { + reg = reg_entry->reg; + real_offset = reg_grp->base_offset + reg->offset; + /* check address */ + if ((real_offset <= address) + && ((real_offset + reg->size) > address)) { + return reg_entry; + } + } + + return NULL; +} + +/* parse BAR */ +static PTBarFlag pt_bar_reg_parse(XenPCIPassthroughState *s, XenPTRegInfo *reg) +{ + PCIDevice *d = &s->dev; + XenPTRegion *region = NULL; + PCIIORegion *r; + int index = 0; + + /* check 64bit BAR */ + index = pt_bar_offset_to_index(reg->offset); + if ((index > 0) && (index < PCI_ROM_SLOT) && + ((s->real_device->base_addr[index - 1] + & (PCI_BASE_ADDRESS_SPACE | PCI_BASE_ADDRESS_MEM_TYPE_MASK)) + == (PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64))) + { + region = &s->bases[index - 1]; + if (region->bar_flag != PT_BAR_FLAG_UPPER) { + return PT_BAR_FLAG_UPPER; + } + } + + /* check unused BAR */ + r = &d->io_regions[index]; + if (r->size == 0) { + return PT_BAR_FLAG_UNUSED; + } + + /* for ExpROM BAR */ + if (index == PCI_ROM_SLOT) { + return PT_BAR_FLAG_MEM; + } + + /* check BAR I/O indicator */ + if (s->real_device->base_addr[index] & PCI_BASE_ADDRESS_SPACE_IO) { + return PT_BAR_FLAG_IO; + } else { + return PT_BAR_FLAG_MEM; + } +} + + +/**************** + * general register functions + */ + +/* register initialization function */ + +static uint32_t pt_common_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + return reg->init_val; +} + +/* Read register functions */ + +static int pt_byte_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint8_t *value, uint8_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint8_t valid_emu_mask = 0; + + /* emulate byte register */ + valid_emu_mask = reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + + return 0; +} +static int pt_word_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t valid_emu_mask = 0; + + /* emulate word register */ + valid_emu_mask = reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + + return 0; +} +static int pt_long_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t valid_emu_mask = 0; + + /* emulate long register */ + valid_emu_mask = reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + + return 0; +} + +/* Write register functions */ + +static int pt_byte_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint8_t *value, uint8_t dev_value, + uint8_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint8_t writable_mask = 0; + uint8_t throughable_mask = 0; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ + throughable_mask = ~reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + return 0; +} +static int pt_word_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t dev_value, + uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t writable_mask = 0; + uint16_t throughable_mask = 0; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ + throughable_mask = ~reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + return 0; +} +static int pt_long_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *value, uint32_t dev_value, + uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t writable_mask = 0; + uint32_t throughable_mask = 0; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ + throughable_mask = ~reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + return 0; +} + +/* common restore register fonctions */ +static int pt_byte_reg_restore(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t real_offset, uint8_t dev_value, + uint8_t *value) +{ + XenPTRegInfo *reg = cfg_entry->reg; + PCIDevice *d = &s->dev; + + /* use I/O device register's value as restore value */ + *value = pci_get_byte(d->config + real_offset); + + /* create value for restoring to I/O device register */ + *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask); + + return 0; +} +static int pt_word_reg_restore(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t real_offset, uint16_t dev_value, + uint16_t *value) +{ + XenPTRegInfo *reg = cfg_entry->reg; + PCIDevice *d = &s->dev; + + /* use I/O device register's value as restore value */ + *value = pci_get_word(d->config + real_offset); + + /* create value for restoring to I/O device register */ + *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask); + + return 0; +} + + +/* XenPTRegInfo declaration + * - only for emulated register (either a part or whole bit). + * - for passthrough register that need special behavior (like interacting with + * other component), set emu_mask to all 0 and specify r/w func properly. + * - do NOT use ALL F for init_val, otherwise the tbl will not be registered. + */ + +/******************** + * Header Type0 + */ + +static uint32_t pt_vendor_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + return s->real_device->vendor_id; +} +static uint32_t pt_device_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + return s->real_device->device_id; +} +static uint32_t pt_status_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + XenPTRegGroup *reg_grp_entry = NULL; + XenPTReg *reg_entry = NULL; + int reg_field = 0; + + /* find Header register group */ + reg_grp_entry = pt_find_reg_grp(s, PCI_CAPABILITY_LIST); + if (reg_grp_entry) { + /* find Capabilities Pointer register */ + reg_entry = pt_find_reg(reg_grp_entry, PCI_CAPABILITY_LIST); + if (reg_entry) { + /* check Capabilities Pointer register */ + if (reg_entry->data) { + reg_field |= PCI_STATUS_CAP_LIST; + } else { + reg_field &= ~PCI_STATUS_CAP_LIST; + } + } else { + hw_error("Internal error: Couldn't find pt_reg_tbl for " + "Capabilities Pointer register. I/O emulator exit.\n"); + } + } else { + hw_error("Internal error: Couldn't find pt_reg_grp_tbl for Header. " + "I/O emulator exit.\n"); + } + + return reg_field; +} +static uint32_t pt_header_type_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, + uint32_t real_offset) +{ + /* read PCI_HEADER_TYPE */ + return reg->init_val | 0x80; +} + +/* initialize Interrupt Pin register */ +static uint32_t pt_irqpin_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + return pci_read_intx(s); +} + +/* Command register */ +static int pt_cmd_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t valid_emu_mask = 0; + uint16_t emu_mask = reg->emu_mask; + + if (s->is_virtfn) { + emu_mask |= PCI_COMMAND_MEMORY; + } + + /* emulate word register */ + valid_emu_mask = emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + + return 0; +} +static int pt_cmd_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t dev_value, + uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t writable_mask = 0; + uint16_t throughable_mask = 0; + uint16_t wr_value = *value; + uint16_t emu_mask = reg->emu_mask; + + if (s->is_virtfn) { + emu_mask |= PCI_COMMAND_MEMORY; + } + + /* modify emulate register */ + writable_mask = ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ + throughable_mask = ~emu_mask & valid_mask; + + if (*value & PCI_COMMAND_INTX_DISABLE) { + if (s->msi_trans_en) { + msi_set_enable(s, 0); + } else { + throughable_mask |= PCI_COMMAND_INTX_DISABLE; + } + } else { + if (s->msi_trans_en) { + msi_set_enable(s, 1); + } else { + if (s->machine_irq) { + throughable_mask |= PCI_COMMAND_INTX_DISABLE; + } + } + } + + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + /* mapping BAR */ + pt_bar_mapping(s, wr_value & PCI_COMMAND_IO, + wr_value & PCI_COMMAND_MEMORY); + + return 0; +} +static int pt_cmd_reg_restore(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t real_offset, uint16_t dev_value, + uint16_t *value) +{ + XenPTRegInfo *reg = cfg_entry->reg; + PCIDevice *d = &s->dev; + uint16_t restorable_mask = 0; + + /* use I/O device register's value as restore value */ + *value = pci_get_word(d->config + real_offset); + + /* create value for restoring to I/O device register + * but do not include Fast Back-to-Back Enable bit. + */ + restorable_mask = reg->emu_mask & ~PCI_COMMAND_FAST_BACK; + *value = PT_MERGE_VALUE(*value, dev_value, restorable_mask); + + if (!s->machine_irq) { + *value |= PCI_COMMAND_INTX_DISABLE; + } else { + *value &= ~PCI_COMMAND_INTX_DISABLE; + } + + return 0; +} + +/* BAR */ +static uint32_t pt_bar_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg, + uint32_t real_offset) +{ + int reg_field = 0; + int index; + + /* get BAR index */ + index = pt_bar_offset_to_index(reg->offset); + if (index < 0) { + hw_error("Internal error: Invalid BAR index[%d]. " + "I/O emulator exit.\n", index); + } + + /* set initial guest physical base address to -1 */ + s->bases[index].e_physbase = -1; + + /* set BAR flag */ + s->bases[index].bar_flag = pt_bar_reg_parse(s, reg); + if (s->bases[index].bar_flag == PT_BAR_FLAG_UNUSED) { + reg_field = PT_INVALID_REG; + } + + return reg_field; +} +static int pt_bar_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t valid_emu_mask = 0; + uint32_t bar_emu_mask = 0; + int index; + + /* get BAR index */ + index = pt_bar_offset_to_index(reg->offset); + if (index < 0) { + hw_error("Internal error: Invalid BAR index[%d]. " + "I/O emulator exit.\n", index); + } + + /* use fixed-up value from kernel sysfs */ + *value = s->real_device->base_addr[index]; + + /* set emulate mask depend on BAR flag */ + switch (s->bases[index].bar_flag) + { + case PT_BAR_FLAG_MEM: + bar_emu_mask = PT_BAR_MEM_EMU_MASK; + break; + case PT_BAR_FLAG_IO: + bar_emu_mask = PT_BAR_IO_EMU_MASK; + break; + case PT_BAR_FLAG_UPPER: + bar_emu_mask = PT_BAR_ALLF; + break; + default: + break; + } + + /* emulate BAR */ + valid_emu_mask = bar_emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + + return 0; +} +static int pt_bar_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *value, uint32_t dev_value, + uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + XenPTRegGroup *reg_grp_entry = NULL; + XenPTReg *reg_entry = NULL; + XenPTRegion *base = NULL; + PCIDevice *d = &s->dev; + PCIIORegion *r; + uint32_t writable_mask = 0; + uint32_t throughable_mask = 0; + uint32_t bar_emu_mask = 0; + uint32_t bar_ro_mask = 0; + uint32_t new_addr, last_addr; + uint32_t prev_offset; + uint32_t r_size = 0; + int index = 0; + + /* get BAR index */ + index = pt_bar_offset_to_index(reg->offset); + if (index < 0) { + hw_error("Internal error: Invalid BAR index[%d]. " + "I/O emulator exit.\n", index); + } + + r = &d->io_regions[index]; + base = &s->bases[index]; + r_size = pt_get_emul_size(base->bar_flag, r->size); + + /* set emulate mask and read-only mask depend on BAR flag */ + switch (s->bases[index].bar_flag) + { + case PT_BAR_FLAG_MEM: + bar_emu_mask = PT_BAR_MEM_EMU_MASK; + bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1); + break; + case PT_BAR_FLAG_IO: + bar_emu_mask = PT_BAR_IO_EMU_MASK; + bar_ro_mask = PT_BAR_IO_RO_MASK | (r_size - 1); + break; + case PT_BAR_FLAG_UPPER: + bar_emu_mask = PT_BAR_ALLF; + bar_ro_mask = 0; /* all upper 32bit are R/W */ + break; + default: + break; + } + + /* modify emulate register */ + writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* check whether we need to update the virtual region address or not */ + switch (s->bases[index].bar_flag) + { + case PT_BAR_FLAG_MEM: + /* nothing to do */ + break; + case PT_BAR_FLAG_IO: + new_addr = cfg_entry->data; + last_addr = new_addr + r_size - 1; + /* check invalid address */ + if (last_addr <= new_addr || !new_addr || last_addr >= 0x10000) { + /* check 64K range */ + if ((last_addr >= 0x10000) && + (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask))) { + PT_LOG("Warning: Guest attempt to set Base Address " + "over the 64KB. [%02x:%02x.%x][Offset:%02xh]" + "[Address:%08xh][Size:%08xh]\n", + pci_bus_num(d->bus), PCI_SLOT(d->devfn), + PCI_FUNC(d->devfn), + reg->offset, new_addr, r_size); + } + /* just remove mapping */ + r->addr = -1; + goto exit; + } + break; + case PT_BAR_FLAG_UPPER: + if (cfg_entry->data) { + if (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask)) { + PT_LOG("Warning: Guest attempt to set high MMIO Base Address. " + "Ignore mapping. " + "[%02x:%02x.%x][Offset:%02xh][High Address:%08xh]\n", + pci_bus_num(d->bus), PCI_SLOT(d->devfn), + PCI_FUNC(d->devfn), reg->offset, cfg_entry->data); + } + /* clear lower address */ + d->io_regions[index-1].addr = -1; + } else { + /* find lower 32bit BAR */ + prev_offset = (reg->offset - 4); + reg_grp_entry = pt_find_reg_grp(s, prev_offset); + if (reg_grp_entry) { + reg_entry = pt_find_reg(reg_grp_entry, prev_offset); + if (reg_entry) { + /* restore lower address */ + d->io_regions[index-1].addr = reg_entry->data; + } else { + return -1; + } + } else { + return -1; + } + } + + /* never mapping the 'empty' upper region, + * because we'll do it enough for the lower region. + */ + r->addr = -1; + goto exit; + default: + break; + } + + /* update the corresponding virtual region address */ + /* + * When guest code tries to get block size of mmio, it will write all "1"s + * into pci bar register. In this case, cfg_entry->data == writable_mask. + * Especially for devices with large mmio, the value of writable_mask + * is likely to be a guest physical address that has been mapped to ram + * rather than mmio. Remapping this value to mmio should be prevented. + */ + + if (cfg_entry->data != writable_mask) { + r->addr = cfg_entry->data; + } + +exit: + /* create value for writing to I/O device register */ + throughable_mask = ~bar_emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + /* After BAR reg update, we need to remap BAR */ + reg_grp_entry = pt_find_reg_grp(s, PCI_COMMAND); + if (reg_grp_entry) { + reg_entry = pt_find_reg(reg_grp_entry, PCI_COMMAND); + if (reg_entry) { + pt_bar_mapping_one(s, index, reg_entry->data & PCI_COMMAND_IO, + reg_entry->data & PCI_COMMAND_MEMORY); + } + } + + return 0; +} +static int pt_bar_reg_restore(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t real_offset, uint32_t dev_value, + uint32_t *value) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t bar_emu_mask = 0; + int index = 0; + + /* get BAR index */ + index = pt_bar_offset_to_index(reg->offset); + if (index < 0) { + hw_error("Internal error: Invalid BAR index[%d]. " + "I/O emulator exit.\n", index); + } + + /* use value from kernel sysfs */ + if (s->bases[index].bar_flag == PT_BAR_FLAG_UPPER) { + *value = s->real_device->base_addr[index-1]; // >> 32; + } else { + *value = s->real_device->base_addr[index]; + } + + /* set emulate mask depend on BAR flag */ + switch (s->bases[index].bar_flag) + { + case PT_BAR_FLAG_MEM: + bar_emu_mask = PT_BAR_MEM_EMU_MASK; + break; + case PT_BAR_FLAG_IO: + bar_emu_mask = PT_BAR_IO_EMU_MASK; + break; + case PT_BAR_FLAG_UPPER: + bar_emu_mask = PT_BAR_ALLF; + break; + default: + break; + } + + /* create value for restoring to I/O device register */ + *value = PT_MERGE_VALUE(*value, dev_value, bar_emu_mask); + + return 0; +} + +/* write Exp ROM BAR */ +static int pt_exp_rom_bar_reg_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint32_t *value, + uint32_t dev_value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + XenPTRegGroup *reg_grp_entry = NULL; + XenPTReg *reg_entry = NULL; + XenPTRegion *base = NULL; + PCIDevice *d = (PCIDevice *)&s->dev; + PCIIORegion *r; + uint32_t writable_mask = 0; + uint32_t throughable_mask = 0; + pcibus_t r_size = 0; + uint32_t bar_emu_mask = 0; + uint32_t bar_ro_mask = 0; + + r = &d->io_regions[PCI_ROM_SLOT]; + r_size = r->size; + base = &s->bases[PCI_ROM_SLOT]; + /* align memory type resource size */ + pt_get_emul_size(base->bar_flag, r_size); + + /* set emulate mask and read-only mask */ + bar_emu_mask = reg->emu_mask; + bar_ro_mask = (reg->ro_mask | (r_size - 1)) & ~PCI_ROM_ADDRESS_ENABLE; + + /* modify emulate register */ + writable_mask = ~bar_ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* update the corresponding virtual region address */ + /* + * When guest code tries to get block size of mmio, it will write all "1"s + * into pci bar register. In this case, cfg_entry->data == writable_mask. + * Especially for devices with large mmio, the value of writable_mask + * is likely to be a guest physical address that has been mapped to ram + * rather than mmio. Remapping this value to mmio should be prevented. + */ + + if (cfg_entry->data != writable_mask) { + r->addr = cfg_entry->data; + } + + /* create value for writing to I/O device register */ + throughable_mask = ~bar_emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + /* After BAR reg update, we need to remap BAR*/ + reg_grp_entry = pt_find_reg_grp(s, PCI_COMMAND); + if (reg_grp_entry) { + reg_entry = pt_find_reg(reg_grp_entry, PCI_COMMAND); + if (reg_entry) { + pt_bar_mapping_one(s, PCI_ROM_SLOT, + reg_entry->data & PCI_COMMAND_IO, + reg_entry->data & PCI_COMMAND_MEMORY); + } + } + + return 0; +} +/* restore ROM BAR */ +static int pt_exp_rom_bar_reg_restore(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, + uint32_t real_offset, + uint32_t dev_value, uint32_t *value) +{ + XenPTRegInfo *reg = cfg_entry->reg; + + /* use value from kernel sysfs */ + *value = + PT_MERGE_VALUE(host_pci_read_long(s->real_device, PCI_ROM_ADDRESS), + dev_value, reg->emu_mask); + return 0; +} + +/* Header Type0 reg static infomation table */ +static XenPTRegInfo pt_emu_reg_header0_tbl[] = { + /* Vendor ID reg */ + { + .offset = PCI_VENDOR_ID, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFFFF, + .emu_mask = 0xFFFF, + .init = pt_vendor_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = NULL, + }, + /* Device ID reg */ + { + .offset = PCI_DEVICE_ID, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFFFF, + .emu_mask = 0xFFFF, + .init = pt_device_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = NULL, + }, + /* Command reg */ + { + .offset = PCI_COMMAND, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xF880, + .emu_mask = 0x0740, + .init = pt_common_reg_init, + .u.w.read = pt_cmd_reg_read, + .u.w.write = pt_cmd_reg_write, + .u.w.restore = pt_cmd_reg_restore, + }, + /* Capabilities Pointer reg */ + { + .offset = PCI_CAPABILITY_LIST, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_ptr_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* Status reg */ + /* use emulated Cap Ptr value to initialize, + * so need to be declared after Cap Ptr reg + */ + { + .offset = PCI_STATUS, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0x06FF, + .emu_mask = 0x0010, + .init = pt_status_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = NULL, + }, + /* Cache Line Size reg */ + { + .offset = PCI_CACHE_LINE_SIZE, + .size = 1, + .init_val = 0x00, + .ro_mask = 0x00, + .emu_mask = 0xFF, + .init = pt_common_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = pt_byte_reg_restore, + }, + /* Latency Timer reg */ + { + .offset = PCI_LATENCY_TIMER, + .size = 1, + .init_val = 0x00, + .ro_mask = 0x00, + .emu_mask = 0xFF, + .init = pt_common_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = pt_byte_reg_restore, + }, + /* Header Type reg */ + { + .offset = PCI_HEADER_TYPE, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0x00, + .init = pt_header_type_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* Interrupt Line reg */ + { + .offset = PCI_INTERRUPT_LINE, + .size = 1, + .init_val = 0x00, + .ro_mask = 0x00, + .emu_mask = 0xFF, + .init = pt_common_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* Interrupt Pin reg */ + { + .offset = PCI_INTERRUPT_PIN, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_irqpin_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* BAR 0 reg */ + /* mask of BAR need to be decided later, depends on IO/MEM type */ + { + .offset = PCI_BASE_ADDRESS_0, + .size = 4, + .init_val = 0x00000000, + .init = pt_bar_reg_init, + .u.dw.read = pt_bar_reg_read, + .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, + }, + /* BAR 1 reg */ + { + .offset = PCI_BASE_ADDRESS_1, + .size = 4, + .init_val = 0x00000000, + .init = pt_bar_reg_init, + .u.dw.read = pt_bar_reg_read, + .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, + }, + /* BAR 2 reg */ + { + .offset = PCI_BASE_ADDRESS_2, + .size = 4, + .init_val = 0x00000000, + .init = pt_bar_reg_init, + .u.dw.read = pt_bar_reg_read, + .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, + }, + /* BAR 3 reg */ + { + .offset = PCI_BASE_ADDRESS_3, + .size = 4, + .init_val = 0x00000000, + .init = pt_bar_reg_init, + .u.dw.read = pt_bar_reg_read, + .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, + }, + /* BAR 4 reg */ + { + .offset = PCI_BASE_ADDRESS_4, + .size = 4, + .init_val = 0x00000000, + .init = pt_bar_reg_init, + .u.dw.read = pt_bar_reg_read, + .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, + }, + /* BAR 5 reg */ + { + .offset = PCI_BASE_ADDRESS_5, + .size = 4, + .init_val = 0x00000000, + .init = pt_bar_reg_init, + .u.dw.read = pt_bar_reg_read, + .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, + }, + /* Expansion ROM BAR reg */ + { + .offset = PCI_ROM_ADDRESS, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0x000007FE, + .emu_mask = 0xFFFFF800, + .init = pt_bar_reg_init, + .u.dw.read = pt_long_reg_read, + .u.dw.write = pt_exp_rom_bar_reg_write, + .u.dw.restore = pt_exp_rom_bar_reg_restore, + }, + { + .size = 0, + }, +}; + + +/********************************* + * Vital Product Data Capability + */ + +/* Vital Product Data Capability Structure reg static infomation table */ +static XenPTRegInfo pt_emu_reg_vpd_tbl[] = { + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_ptr_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + { + .size = 0, + }, +}; + + +/************************************** + * Vendor Specific Capability + */ + +/* Vendor Specific Capability Structure reg static infomation table */ +static XenPTRegInfo pt_emu_reg_vendor_tbl[] = { + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_ptr_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + { + .size = 0, + }, +}; + + +/***************************** + * PCI Express Capability + */ + +/* initialize Link Control register */ +static uint32_t pt_linkctrl_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + uint8_t cap_ver = 0; + uint8_t dev_type = 0; + + /* TODO maybe better to use fonction from hw/pcie.c */ + cap_ver = pci_get_byte(s->dev.config + real_offset - reg->offset + + PCI_EXP_FLAGS) + & PCI_EXP_FLAGS_VERS; + dev_type = (pci_get_byte(s->dev.config + real_offset - reg->offset + + PCI_EXP_FLAGS) + & PCI_EXP_FLAGS_TYPE) >> 4; + + /* no need to initialize in case of Root Complex Integrated Endpoint + * with cap_ver 1.x + */ + if ((dev_type == PCI_EXP_TYPE_RC_END) && (cap_ver == 1)) { + return PT_INVALID_REG; + } + + return reg->init_val; +} +/* initialize Device Control 2 register */ +static uint32_t pt_devctrl2_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + uint8_t cap_ver = 0; + + cap_ver = pci_get_byte(s->dev.config + real_offset - reg->offset + + PCI_EXP_FLAGS) + & PCI_EXP_FLAGS_VERS; + + /* no need to initialize in case of cap_ver 1.x */ + if (cap_ver == 1) { + return PT_INVALID_REG; + } + + return reg->init_val; +} +/* initialize Link Control 2 register */ +static uint32_t pt_linkctrl2_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + int reg_field = 0; + uint8_t cap_ver = 0; + + cap_ver = pci_get_byte(s->dev.config + real_offset - reg->offset + + PCI_EXP_FLAGS) + & PCI_EXP_FLAGS_VERS; + + /* no need to initialize in case of cap_ver 1.x */ + if (cap_ver == 1) { + return PT_INVALID_REG; + } + + /* set Supported Link Speed */ + reg_field |= PCI_EXP_LNKCAP_SLS & + pci_get_byte(s->dev.config + real_offset - reg->offset + + PCI_EXP_LNKCAP); + + return reg_field; +} + +/* PCI Express Capability Structure reg static infomation table */ +static XenPTRegInfo pt_emu_reg_pcie_tbl[] = { + /* Next Pointer reg */ + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_ptr_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* Device Capabilities reg */ + { + .offset = PCI_EXP_DEVCAP, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0x1FFCFFFF, + .emu_mask = 0x10000000, + .init = pt_common_reg_init, + .u.dw.read = pt_long_reg_read, + .u.dw.write = pt_long_reg_write, + .u.dw.restore = NULL, + }, + /* Device Control reg */ + { + .offset = PCI_EXP_DEVCTL, + .size = 2, + .init_val = 0x2810, + .ro_mask = 0x8400, + .emu_mask = 0xFFFF, + .init = pt_common_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = pt_word_reg_restore, + }, + /* Link Control reg */ + { + .offset = PCI_EXP_LNKCTL, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFC34, + .emu_mask = 0xFFFF, + .init = pt_linkctrl_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = pt_word_reg_restore, + }, + /* Device Control 2 reg */ + { + .offset = 0x28, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFFE0, + .emu_mask = 0xFFFF, + .init = pt_devctrl2_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = pt_word_reg_restore, + }, + /* Link Control 2 reg */ + { + .offset = 0x30, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xE040, + .emu_mask = 0xFFFF, + .init = pt_linkctrl2_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = pt_word_reg_restore, + }, + { + .size = 0, + }, +}; + + +/********************************* + * Power Management Capability + */ + +/* initialize Power Management Capabilities register */ +static uint32_t pt_pmc_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + PCIDevice *d = &s->dev; + + if (!s->power_mgmt) { + return reg->init_val; + } + + /* set Power Management Capabilities register */ + s->pm_state->pmc_field = pci_get_word(d->config + real_offset); + + return reg->init_val; +} +/* initialize PCI Power Management Control/Status register */ +static uint32_t pt_pmcsr_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + PCIDevice *d = &s->dev; + uint16_t cap_ver = 0; + + if (!s->power_mgmt) { + return reg->init_val; + } + + /* check PCI Power Management support version */ + cap_ver = s->pm_state->pmc_field & PCI_PM_CAP_VER_MASK; + + if (cap_ver > 2) { + /* set No Soft Reset */ + s->pm_state->no_soft_reset = + pci_get_byte(d->config + real_offset) & PCI_PM_CTRL_NO_SOFT_RESET; + } + + /* wake up real physical device */ + switch (host_pci_read_word(s->real_device, real_offset) + & PCI_PM_CTRL_STATE_MASK) { + case 0: + break; + case 1: + PT_LOG("Power state transition D1 -> D0active\n"); + host_pci_write_word(s->real_device, real_offset, 0); + break; + case 2: + PT_LOG("Power state transition D2 -> D0active\n"); + host_pci_write_word(s->real_device, real_offset, 0); + usleep(200); + break; + case 3: + PT_LOG("Power state transition D3hot -> D0active\n"); + host_pci_write_word(s->real_device, real_offset, 0); + usleep(10 * 1000); + pt_init_pci_config(s); + break; + } + + return reg->init_val; +} +/* read Power Management Control/Status register */ +static int pt_pmcsr_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t valid_emu_mask = reg->emu_mask; + + if (!s->power_mgmt) { + valid_emu_mask |= PCI_PM_CTRL_STATE_MASK | PCI_PM_CTRL_NO_SOFT_RESET; + } + + valid_emu_mask = valid_emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + + return 0; +} +/* reset Interrupt and I/O resource */ +static void pt_reset_interrupt_and_io_mapping(XenPCIPassthroughState *s) +{ + PCIDevice *d = &s->dev; + PCIIORegion *r; + int i = 0; + uint8_t e_device = 0; + uint8_t e_intx = 0; + + /* unbind INTx */ + e_device = PCI_SLOT(s->dev.devfn); + e_intx = pci_intx(s); + + if (s->msi_trans_en == 0 && s->machine_irq) { + if (xc_domain_unbind_pt_irq(xen_xc, xen_domid, s->machine_irq, + PT_IRQ_TYPE_PCI, 0, e_device, e_intx, 0)) { + PT_LOG("Error: Unbinding of interrupt failed!\n"); + } + } + + /* disable MSI/MSI-X and MSI-INTx translation */ + if (s->msi) { + pt_msi_disable(s); + } + if (s->msix) { + pt_msix_disable(s); + } + + /* clear all virtual region address */ + for (i = 0; i < PCI_NUM_REGIONS; i++) { + r = &d->io_regions[i]; + r->addr = -1; + } + + /* unmapping BAR */ + pt_bar_mapping(s, 0, 0); +} +/* check power state transition */ +static int check_power_state(XenPCIPassthroughState *s) +{ + XenPTPM *pm_state = s->pm_state; + PCIDevice *d = &s->dev; + uint16_t read_val = 0; + uint16_t cur_state = 0; + + /* get current power state */ + read_val = host_pci_read_word(s->real_device, + pm_state->pm_base + PCI_PM_CTRL); + cur_state = read_val & PCI_PM_CTRL_STATE_MASK; + + if (pm_state->req_state != cur_state) { + PT_LOG("Error: Failed to change power state. " + "[%02x:%02x.%x][requested state:%d][current state:%d]\n", + pci_bus_num(d->bus), PCI_SLOT(d->devfn), PCI_FUNC(d->devfn), + pm_state->req_state, cur_state); + return -1; + } + return 0; +} +/* write Power Management Control/Status register */ +static void pt_from_d3hot_to_d0_with_reset(void *opaque) +{ + XenPCIPassthroughState *s = opaque; + XenPTPM *pm_state = s->pm_state; + int ret = 0; + + /* check power state */ + ret = check_power_state(s); + + if (ret < 0) { + goto out; + } + + pt_init_pci_config(s); + +out: + /* power state transition flags off */ + pm_state->flags &= ~PT_FLAG_TRANSITING; + + qemu_free_timer(pm_state->pm_timer); + pm_state->pm_timer = NULL; +} +static void pt_default_power_transition(void *opaque) +{ + XenPCIPassthroughState *ptdev = opaque; + XenPTPM *pm_state = ptdev->pm_state; + + /* check power state */ + check_power_state(ptdev); + + /* power state transition flags off */ + pm_state->flags &= ~PT_FLAG_TRANSITING; + + qemu_free_timer(pm_state->pm_timer); + pm_state->pm_timer = NULL; +} +static int pt_pmcsr_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t dev_value, + uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + PCIDevice *d = &s->dev; + uint16_t emu_mask = reg->emu_mask; + uint16_t writable_mask = 0; + uint16_t throughable_mask = 0; + struct pt_pm_info *pm_state = s->pm_state; + + if (!s->power_mgmt) { + emu_mask |= PCI_PM_CTRL_STATE_MASK | PCI_PM_CTRL_NO_SOFT_RESET; + } + + /* modify emulate register */ + writable_mask = emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ + throughable_mask = ~emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + if (!s->power_mgmt) { + return 0; + } + + /* set I/O device power state */ + pm_state->cur_state = dev_value & PCI_PM_CTRL_STATE_MASK; + + /* set Guest requested PowerState */ + pm_state->req_state = *value & PCI_PM_CTRL_STATE_MASK; + + /* check power state transition or not */ + if (pm_state->cur_state == pm_state->req_state) { + /* not power state transition */ + return 0; + } + + /* check enable power state transition */ + if ((pm_state->req_state != 0) && + (pm_state->cur_state > pm_state->req_state)) { + PT_LOG("Error: Invalid power transition. " + "[%02x:%02x.%x][requested state:%d][current state:%d]\n", + pci_bus_num(d->bus), PCI_SLOT(d->devfn), PCI_FUNC(d->devfn), + pm_state->req_state, pm_state->cur_state); + + return 0; + } + + /* check if this device supports the requested power state */ + if (((pm_state->req_state == 1) && !(pm_state->pmc_field & PCI_PM_CAP_D1)) + || ((pm_state->req_state == 2) && + !(pm_state->pmc_field & PCI_PM_CAP_D2))) { + PT_LOG("Error: Invalid power transition. " + "[%02x:%02x.%x][requested state:%d][current state:%d]\n", + pci_bus_num(d->bus), PCI_SLOT(d->devfn), PCI_FUNC(d->devfn), + pm_state->req_state, pm_state->cur_state); + + return 0; + } + + /* in case of transition related to D3hot, it's necessary to wait 10 ms. + * But because writing to register will be performed later on actually, + * don't start QEMUTimer right now, just alloc and init QEMUTimer here. + */ + if ((pm_state->cur_state == 3) || (pm_state->req_state == 3)) { + if (pm_state->req_state == 0) { + /* alloc and init QEMUTimer */ + if (!pm_state->no_soft_reset) { + pm_state->pm_timer = qemu_new_timer_ms(rt_clock, + pt_from_d3hot_to_d0_with_reset, s); + + /* reset Interrupt and I/O resource mapping */ + pt_reset_interrupt_and_io_mapping(s); + } else { + pm_state->pm_timer = qemu_new_timer_ms(rt_clock, + pt_default_power_transition, s); + } + } else { + /* alloc and init QEMUTimer */ + pm_state->pm_timer = qemu_new_timer_ms(rt_clock, + pt_default_power_transition, s); + } + + /* set power state transition delay */ + pm_state->pm_delay = 10; + + /* power state transition flags on */ + pm_state->flags |= PT_FLAG_TRANSITING; + } + /* in case of transition related to D0, D1 and D2, + * no need to use QEMUTimer. + * So, we perfom writing to register here and then read it back. + */ + else { + /* write power state to I/O device register */ + host_pci_write_word(s->real_device, pm_state->pm_base + PCI_PM_CTRL, + *value); + + /* in case of transition related to D2, + * it's necessary to wait 200 usec. + * But because QEMUTimer do not support microsec unit right now, + * so we do wait ourself here. + */ + if ((pm_state->cur_state == 2) || (pm_state->req_state == 2)) { + usleep(200); + } + + /* check power state */ + check_power_state(s); + + /* recreate value for writing to I/O device register */ + *value = host_pci_read_word(s->real_device, + pm_state->pm_base + PCI_PM_CTRL); + } + + return 0; +} + +/* restore Power Management Control/Status register */ +static int pt_pmcsr_reg_restore(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t real_offset, uint16_t dev_value, + uint16_t *value) +{ + /* create value for restoring to I/O device register + * No need to restore, just clear PME Enable and PME Status bit + * Note: register type of PME Status bit is RW1C, so clear by writing 1b + */ + *value = (dev_value & ~PCI_PM_CTRL_PME_ENABLE) | PCI_PM_CTRL_PME_STATUS; + + return 0; +} + + +/* Power Management Capability reg static infomation table */ +static XenPTRegInfo pt_emu_reg_pm_tbl[] = { + /* Next Pointer reg */ + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_ptr_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* Power Management Capabilities reg */ + { + .offset = PCI_CAP_FLAGS, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFFFF, + .emu_mask = 0xF9C8, + .init = pt_pmc_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = NULL, + }, + /* PCI Power Management Control/Status reg */ + { + .offset = PCI_PM_CTRL, + .size = 2, + .init_val = 0x0008, + .ro_mask = 0xE1FC, + .emu_mask = 0x8100, + .init = pt_pmcsr_reg_init, + .u.w.read = pt_pmcsr_reg_read, + .u.w.write = pt_pmcsr_reg_write, + .u.w.restore = pt_pmcsr_reg_restore, + }, + { + .size = 0, + }, +}; + +/******************************** + * MSI Capability + */ + +/* Message Control register */ +static uint32_t pt_msgctrl_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + PCIDevice *d = &s->dev; + uint16_t reg_field = 0; + + /* use I/O device register's value as initial value */ + reg_field = pci_get_word(d->config + real_offset); + + if (reg_field & PCI_MSI_FLAGS_ENABLE) { + PT_LOG("MSI enabled already, disable first\n"); + host_pci_write_word(s->real_device, real_offset, + reg_field & ~PCI_MSI_FLAGS_ENABLE); + } + s->msi->flags |= reg_field | MSI_FLAG_UNINIT; + s->msi->ctrl_offset = real_offset; + + return reg->init_val; +} +static int pt_msgctrl_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t dev_value, + uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t writable_mask = 0; + uint16_t throughable_mask = 0; + PCIDevice *pd = (PCIDevice *)s; + uint16_t val; + + /* Currently no support for multi-vector */ + if (*value & PCI_MSI_FLAGS_QSIZE) { + PT_LOG("Warning: try to set more than 1 vector ctrl %x\n", *value); + } + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + /* update the msi_info too */ + s->msi->flags |= cfg_entry->data & + ~(MSI_FLAG_UNINIT | PT_MSI_MAPPED | PCI_MSI_FLAGS_ENABLE); + + /* create value for writing to I/O device register */ + val = *value; + throughable_mask = ~reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + /* update MSI */ + if (val & PCI_MSI_FLAGS_ENABLE) { + /* setup MSI pirq for the first time */ + if (s->msi->flags & MSI_FLAG_UNINIT) { + if (s->msi_trans_en) { + PT_LOG("guest enabling MSI, disable MSI-INTx translation\n"); + pt_disable_msi_translate(s); + } else { + /* Init physical one */ + PT_LOG("setup msi for dev %x\n", pd->devfn); + if (pt_msi_setup(s)) { + /* We do not broadcast the error to the framework code, so + * that MSI errors are contained in MSI emulation code and + * QEMU can go on running. + * Guest MSI would be actually not working. + */ + *value &= ~PCI_MSI_FLAGS_ENABLE; + PT_LOG("Warning: Can not map MSI for dev %x\n", pd->devfn); + return 0; + } + } + if (pt_msi_update(s)) { + *value &= ~PCI_MSI_FLAGS_ENABLE; + PT_LOG("Warning: Can not bind MSI for dev %x\n", pd->devfn); + return 0; + } + s->msi->flags &= ~MSI_FLAG_UNINIT; + s->msi->flags |= PT_MSI_MAPPED; + } + s->msi->flags |= PCI_MSI_FLAGS_ENABLE; + } else { + s->msi->flags &= ~PCI_MSI_FLAGS_ENABLE; + } + + /* pass through MSI_ENABLE bit when no MSI-INTx translation */ + if (!s->msi_trans_en) { + *value &= ~PCI_MSI_FLAGS_ENABLE; + *value |= val & PCI_MSI_FLAGS_ENABLE; + } + + return 0; +} + +/* initialize Message Upper Address register */ +static uint32_t pt_msgaddr64_reg_init(XenPCIPassthroughState *ptdev, + XenPTRegInfo *reg, uint32_t real_offset) +{ + /* no need to initialize in case of 32 bit type */ + if (!(ptdev->msi->flags & PCI_MSI_FLAGS_64BIT)) { + return PT_INVALID_REG; + } + + return reg->init_val; +} +/* this function will be called twice (for 32 bit and 64 bit type) */ +/* initialize Message Data register */ +static uint32_t pt_msgdata_reg_init(XenPCIPassthroughState *ptdev, + XenPTRegInfo *reg, uint32_t real_offset) +{ + uint32_t flags = ptdev->msi->flags; + uint32_t offset = reg->offset; + + /* check the offset whether matches the type or not */ + if (((offset == PCI_MSI_DATA_64) && (flags & PCI_MSI_FLAGS_64BIT)) || + ((offset == PCI_MSI_DATA_32) && !(flags & PCI_MSI_FLAGS_64BIT))) { + return reg->init_val; + } else { + return PT_INVALID_REG; + } +} + +/* write Message Address register */ +static int pt_msgaddr32_reg_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint32_t *value, + uint32_t dev_value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t writable_mask = 0; + uint32_t throughable_mask = 0; + uint32_t old_addr = cfg_entry->data; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + /* update the msi_info too */ + s->msi->addr_lo = cfg_entry->data; + + /* create value for writing to I/O device register */ + throughable_mask = ~reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + /* update MSI */ + if (cfg_entry->data != old_addr) { + if (s->msi->flags & PT_MSI_MAPPED) { + pt_msi_update(s); + } + } + + return 0; +} +/* write Message Upper Address register */ +static int pt_msgaddr64_reg_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint32_t *value, + uint32_t dev_value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t writable_mask = 0; + uint32_t throughable_mask = 0; + uint32_t old_addr = cfg_entry->data; + + /* check whether the type is 64 bit or not */ + if (!(s->msi->flags & PCI_MSI_FLAGS_64BIT)) { + /* exit I/O emulator */ + PT_LOG("Error: why comes to Upper Address without 64 bit support??\n"); + return -1; + } + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + /* update the msi_info too */ + s->msi->addr_hi = cfg_entry->data; + + /* create value for writing to I/O device register */ + throughable_mask = ~reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + /* update MSI */ + if (cfg_entry->data != old_addr) { + if (s->msi->flags & PT_MSI_MAPPED) { + pt_msi_update(s); + } + } + + return 0; +} + + +/* this function will be called twice (for 32 bit and 64 bit type) */ +/* write Message Data register */ +static int pt_msgdata_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t dev_value, + uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t writable_mask = 0; + uint16_t throughable_mask = 0; + uint16_t old_data = cfg_entry->data; + uint32_t flags = s->msi->flags; + uint32_t offset = reg->offset; + + /* check the offset whether matches the type or not */ + if (!((offset == PCI_MSI_DATA_64) && (flags & PCI_MSI_FLAGS_64BIT)) && + !((offset == PCI_MSI_DATA_32) && !(flags & PCI_MSI_FLAGS_64BIT))) { + /* exit I/O emulator */ + PT_LOG("Error: the offset is not match with the 32/64 bit type!!\n"); + return -1; + } + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + /* update the msi_info too */ + s->msi->data = cfg_entry->data; + + /* create value for writing to I/O device register */ + throughable_mask = ~reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + /* update MSI */ + if (cfg_entry->data != old_data) { + if (flags & PT_MSI_MAPPED) { + pt_msi_update(s); + } + } + + return 0; +} + +/* MSI Capability Structure reg static infomation table */ +static XenPTRegInfo pt_emu_reg_msi_tbl[] = { + /* Next Pointer reg */ + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_ptr_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* Message Control reg */ + { + .offset = PCI_MSI_FLAGS, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFF8E, + .emu_mask = 0x007F, + .init = pt_msgctrl_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_msgctrl_reg_write, + .u.w.restore = NULL, + }, + /* Message Address reg */ + { + .offset = PCI_MSI_ADDRESS_LO, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0x00000003, + .emu_mask = 0xFFFFFFFF, + .no_wb = 1, + .init = pt_common_reg_init, + .u.dw.read = pt_long_reg_read, + .u.dw.write = pt_msgaddr32_reg_write, + .u.dw.restore = NULL, + }, + /* Message Upper Address reg (if PCI_MSI_FLAGS_64BIT set) */ + { + .offset = PCI_MSI_ADDRESS_HI, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0x00000000, + .emu_mask = 0xFFFFFFFF, + .no_wb = 1, + .init = pt_msgaddr64_reg_init, + .u.dw.read = pt_long_reg_read, + .u.dw.write = pt_msgaddr64_reg_write, + .u.dw.restore = NULL, + }, + /* Message Data reg (16 bits of data for 32-bit devices) */ + { + .offset = PCI_MSI_DATA_32, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0x0000, + .emu_mask = 0xFFFF, + .no_wb = 1, + .init = pt_msgdata_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_msgdata_reg_write, + .u.w.restore = NULL, + }, + /* Message Data reg (16 bits of data for 64-bit devices) */ + { + .offset = PCI_MSI_DATA_64, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0x0000, + .emu_mask = 0xFFFF, + .no_wb = 1, + .init = pt_msgdata_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_msgdata_reg_write, + .u.w.restore = NULL, + }, + { + .size = 0, + }, +}; + + +/************************************** + * MSI-X Capability + */ + +/* Message Control register for MSI-X */ +static uint32_t pt_msixctrl_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + PCIDevice *d = &s->dev; + uint16_t reg_field = 0; + + /* use I/O device register's value as initial value */ + reg_field = pci_get_word(d->config + real_offset); + + if (reg_field & PCI_MSIX_FLAGS_ENABLE) { + PT_LOG("MSIX enabled already, disable first\n"); + host_pci_write_word(s->real_device, real_offset, + reg_field & ~PCI_MSIX_FLAGS_ENABLE); + } + + s->msix->ctrl_offset = real_offset; + + return reg->init_val; +} +static int pt_msixctrl_reg_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint16_t *value, + uint16_t dev_value, uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t writable_mask = 0; + uint16_t throughable_mask = 0; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ + throughable_mask = ~reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + /* update MSI-X */ + if ((*value & PCI_MSIX_FLAGS_ENABLE) + && !(*value & PCI_MSIX_FLAGS_MASKALL)) { + if (s->msi_trans_en) { + PT_LOG("guest enabling MSI-X, disable MSI-INTx translation\n"); + pt_disable_msi_translate(s); + } + pt_msix_update(s); + } + + s->msix->enabled = !!(*value & PCI_MSIX_FLAGS_ENABLE); + + return 0; +} + +/* MSI-X Capability Structure reg static infomation table */ +static XenPTRegInfo pt_emu_reg_msix_tbl[] = { + /* Next Pointer reg */ + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_ptr_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* Message Control reg */ + { + .offset = PCI_MSI_FLAGS, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0x3FFF, + .emu_mask = 0x0000, + .init = pt_msixctrl_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_msixctrl_reg_write, + .u.w.restore = NULL, + }, + { + .size = 0, + }, +}; + + +/**************************** + * Capabilities + */ + +/* AER register operations */ + +static void aer_save_one_register(XenPCIPassthroughState *s, int offset) +{ + PCIDevice *d = &s->dev; + uint32_t aer_base = s->pm_state->aer_base; + uint32_t val = 0; + + val = host_pci_read_long(s->real_device, aer_base + offset); + pci_set_long(d->config + aer_base + offset, val); +} +static void pt_aer_reg_save(XenPCIPassthroughState *s) +{ + /* after reset, following register values should be restored. + * So, save them. + */ + aer_save_one_register(s, PCI_ERR_UNCOR_MASK); + aer_save_one_register(s, PCI_ERR_UNCOR_SEVER); + aer_save_one_register(s, PCI_ERR_COR_MASK); + aer_save_one_register(s, PCI_ERR_CAP); +} +static void aer_restore_one_register(XenPCIPassthroughState *s, int offset) +{ + PCIDevice *d = &s->dev; + uint32_t aer_base = s->pm_state->aer_base; + uint32_t config = 0; + + config = pci_get_long(d->config + aer_base + offset); + host_pci_write_long(s->real_device, aer_base + offset, config); +} +static void pt_aer_reg_restore(XenPCIPassthroughState *s) +{ + /* the following registers should be reconfigured to correct values + * after reset. restore them. + * other registers should not be reconfigured after reset + * if there is no reason + */ + aer_restore_one_register(s, PCI_ERR_UNCOR_MASK); + aer_restore_one_register(s, PCI_ERR_UNCOR_SEVER); + aer_restore_one_register(s, PCI_ERR_COR_MASK); + aer_restore_one_register(s, PCI_ERR_CAP); +} + +/* capability structure register group size functions */ + +static uint8_t pt_reg_grp_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset) +{ + return grp_reg->grp_size; +} +/* get Power Management Capability Structure register group size */ +static uint8_t pt_pm_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset) +{ + if (!s->power_mgmt) { + return grp_reg->grp_size; + } + + s->pm_state = g_malloc0(sizeof (XenPTPM)); + + /* set Power Management Capability base offset */ + s->pm_state->pm_base = base_offset; + + /* find AER register and set AER Capability base offset */ + s->pm_state->aer_base = host_pci_find_ext_cap_offset(s->real_device, + PCI_EXT_CAP_ID_ERR); + + /* save AER register */ + if (s->pm_state->aer_base) { + pt_aer_reg_save(s); + } + + return grp_reg->grp_size; +} +/* get Vendor Specific Capability Structure register group size */ +static uint8_t pt_vendor_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset) +{ + return pci_get_byte(s->dev.config + base_offset + 0x02); +} +/* get PCI Express Capability Structure register group size */ +static uint8_t pt_pcie_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset) +{ + PCIDevice *d = &s->dev; + uint16_t exp_flag = 0; + uint16_t type = 0; + uint16_t version = 0; + uint8_t pcie_size = 0; + + exp_flag = pci_get_word(d->config + base_offset + PCI_EXP_FLAGS); + type = (exp_flag & PCI_EXP_FLAGS_TYPE) >> 4; + version = exp_flag & PCI_EXP_FLAGS_VERS; + + /* calculate size depend on capability version and device/port type */ + /* in case of PCI Express Base Specification Rev 1.x */ + if (version == 1) { + /* The PCI Express Capabilities, Device Capabilities, and Device + * Status/Control registers are required for all PCI Express devices. + * The Link Capabilities and Link Status/Control are required for all + * Endpoints that are not Root Complex Integrated Endpoints. Endpoints + * are not required to implement registers other than those listed + * above and terminate the capability structure. + */ + switch (type) { + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_LEG_END: + pcie_size = 0x14; + break; + case PCI_EXP_TYPE_RC_END: + /* has no link */ + pcie_size = 0x0C; + break; + /* only EndPoint passthrough is supported */ + case PCI_EXP_TYPE_ROOT_PORT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_PCI_BRIDGE: + case PCI_EXP_TYPE_PCIE_BRIDGE: + case PCI_EXP_TYPE_RC_EC: + default: + hw_error("Internal error: Unsupported device/port type[%d]. " + "I/O emulator exit.\n", type); + } + } + /* in case of PCI Express Base Specification Rev 2.0 */ + else if (version == 2) { + switch (type) { + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_LEG_END: + case PCI_EXP_TYPE_RC_END: + /* For Functions that do not implement the registers, + * these spaces must be hardwired to 0b. + */ + pcie_size = 0x3C; + break; + /* only EndPoint passthrough is supported */ + case PCI_EXP_TYPE_ROOT_PORT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_PCI_BRIDGE: + case PCI_EXP_TYPE_PCIE_BRIDGE: + case PCI_EXP_TYPE_RC_EC: + default: + hw_error("Internal error: Unsupported device/port type[%d]. " + "I/O emulator exit.\n", type); + } + } else { + hw_error("Internal error: Unsupported capability version[%d]. " + "I/O emulator exit.\n", version); + } + + return pcie_size; +} +/* get MSI Capability Structure register group size */ +static uint8_t pt_msi_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset) +{ + PCIDevice *d = &s->dev; + uint16_t msg_ctrl = 0; + uint8_t msi_size = 0xa; + + msg_ctrl = pci_get_word(d->config + (base_offset + PCI_MSI_FLAGS)); + + /* check 64 bit address capable & Per-vector masking capable */ + if (msg_ctrl & PCI_MSI_FLAGS_64BIT) { + msi_size += 4; + } + if (msg_ctrl & PCI_MSI_FLAGS_MASKBIT) { + msi_size += 10; + } + + s->msi = g_malloc0(sizeof (XenPTMSI)); + s->msi->pirq = -1; + PT_LOG("done\n"); + + return msi_size; +} +/* get MSI-X Capability Structure register group size */ +static uint8_t pt_msix_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset) +{ + int ret = 0; + + ret = pt_msix_init(s, base_offset); + + if (ret == -1) { + hw_error("Internal error: Invalid pt_msix_init return value[%d]. " + "I/O emulator exit.\n", ret); + } + + return grp_reg->grp_size; +} + + +static const XenPTRegGroupInfo pt_emu_reg_grp_tbl[] = { + /* Header Type0 reg group */ + { + .grp_id = 0xFF, + .grp_type = GRP_TYPE_EMU, + .grp_size = 0x40, + .size_init = pt_reg_grp_size_init, + .emu_reg_tbl= pt_emu_reg_header0_tbl, + }, + /* PCI PowerManagement Capability reg group */ + { + .grp_id = PCI_CAP_ID_PM, + .grp_type = GRP_TYPE_EMU, + .grp_size = PCI_PM_SIZEOF, + .size_init = pt_pm_size_init, + .emu_reg_tbl= pt_emu_reg_pm_tbl, + }, + /* AGP Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_AGP, + .grp_type = GRP_TYPE_HARDWIRED, + .grp_size = 0x30, + .size_init = pt_reg_grp_size_init, + }, + /* Vital Product Data Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_VPD, + .grp_type = GRP_TYPE_EMU, + .grp_size = 0x08, + .size_init = pt_reg_grp_size_init, + .emu_reg_tbl= pt_emu_reg_vpd_tbl, + }, + /* Slot Identification reg group */ + { + .grp_id = PCI_CAP_ID_SLOTID, + .grp_type = GRP_TYPE_HARDWIRED, + .grp_size = 0x04, + .size_init = pt_reg_grp_size_init, + }, + /* MSI Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_MSI, + .grp_type = GRP_TYPE_EMU, + .grp_size = 0xFF, + .size_init = pt_msi_size_init, + .emu_reg_tbl= pt_emu_reg_msi_tbl, + }, + /* PCI-X Capabilities List Item reg group */ + { + .grp_id = PCI_CAP_ID_PCIX, + .grp_type = GRP_TYPE_HARDWIRED, + .grp_size = 0x18, + .size_init = pt_reg_grp_size_init, + }, + /* Vendor Specific Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_VNDR, + .grp_type = GRP_TYPE_EMU, + .grp_size = 0xFF, + .size_init = pt_vendor_size_init, + .emu_reg_tbl= pt_emu_reg_vendor_tbl, + }, + /* SHPC Capability List Item reg group */ + { + .grp_id = PCI_CAP_ID_SHPC, + .grp_type = GRP_TYPE_HARDWIRED, + .grp_size = 0x08, + .size_init = pt_reg_grp_size_init, + }, + /* Subsystem ID and Subsystem Vendor ID Capability List Item reg group */ + { + .grp_id = PCI_CAP_ID_SSVID, + .grp_type = GRP_TYPE_HARDWIRED, + .grp_size = 0x08, + .size_init = pt_reg_grp_size_init, + }, + /* AGP 8x Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_AGP3, + .grp_type = GRP_TYPE_HARDWIRED, + .grp_size = 0x30, + .size_init = pt_reg_grp_size_init, + }, + /* PCI Express Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_EXP, + .grp_type = GRP_TYPE_EMU, + .grp_size = 0xFF, + .size_init = pt_pcie_size_init, + .emu_reg_tbl= pt_emu_reg_pcie_tbl, + }, + /* MSI-X Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_MSIX, + .grp_type = GRP_TYPE_EMU, + .grp_size = 0x0C, + .size_init = pt_msix_size_init, + .emu_reg_tbl= pt_emu_reg_msix_tbl, + }, + { + .grp_size = 0, + }, +}; + +/* initialize Capabilities Pointer or Next Pointer register */ +static uint32_t pt_ptr_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset) +{ + /* uint32_t reg_field = (uint32_t)s->dev.config[real_offset]; */ + uint32_t reg_field = pci_get_byte(s->dev.config + real_offset); + int i; + + /* find capability offset */ + while (reg_field) { + for (i = 0; pt_emu_reg_grp_tbl[i].grp_size != 0; i++) { + if (pt_hide_dev_cap(s->real_device, + pt_emu_reg_grp_tbl[i].grp_id)) { + continue; + } + if (pt_emu_reg_grp_tbl[i].grp_id == s->dev.config[reg_field]) { + if (pt_emu_reg_grp_tbl[i].grp_type == GRP_TYPE_EMU) { + goto out; + } + /* ignore the 0 hardwired capability, find next one */ + break; + } + } + /* next capability */ + /* reg_field = (uint32_t)s->dev.config[reg_field + 1]; */ + reg_field = pci_get_byte(s->dev.config + reg_field + 1); + } + +out: + return reg_field; +} + + +/************* + * Main + */ + +/* restore a part of I/O device register */ +static void pt_config_restore(XenPCIPassthroughState *s) +{ + XenPTRegGroup *reg_grp_entry = NULL; + XenPTReg *reg_entry = NULL; + XenPTRegInfo *reg = NULL; + uint32_t real_offset = 0; + uint32_t read_val = 0; + uint32_t val = 0; + int ret = 0; + + /* find emulate register group entry */ + QLIST_FOREACH(reg_grp_entry, &s->reg_grp_tbl, entries) { + /* find emulate register entry */ + QLIST_FOREACH(reg_entry, ®_grp_entry->reg_tbl_list, entries) { + reg = reg_entry->reg; + + /* check whether restoring is needed */ + if (!reg->u.b.restore) { + continue; + } + + real_offset = reg_grp_entry->base_offset + reg->offset; + + /* read I/O device register value */ + ret = host_pci_read_block(s->real_device, real_offset, + (uint8_t *)&read_val, reg->size); + + if (!ret) { + PT_LOG("Error: pci_read_block failed. " + "return value[%d].\n", ret); + memset(&read_val, 0xff, reg->size); + } + + val = 0; + + /* restore based on register size */ + switch (reg->size) { + case 1: + /* byte register */ + ret = reg->u.b.restore(s, reg_entry, real_offset, + (uint8_t)read_val, (uint8_t *)&val); + break; + case 2: + /* word register */ + ret = reg->u.w.restore(s, reg_entry, real_offset, + (uint16_t)read_val, (uint16_t *)&val); + break; + case 4: + /* double word register */ + ret = reg->u.dw.restore(s, reg_entry, real_offset, + (uint32_t)read_val, (uint32_t *)&val); + break; + } + + /* restoring error */ + if (ret < 0) { + hw_error("Internal error: Invalid restoring " + "return value[%d]. I/O emulator exit.\n", ret); + } + + PT_LOG_CONFIG("[%02x:%02x.%x]: address=%04x val=0x%08x len=%d\n", + pci_bus_num(s->dev.bus), PCI_SLOT(s->dev.devfn), + PCI_FUNC(s->dev.devfn), + real_offset, val, reg->size); + + ret = host_pci_write_block(s->real_device, real_offset, + (uint8_t *)&val, reg->size); + + if (!ret) { + PT_LOG("Error: pci_write_block failed. " + "return value[%d].\n", ret); + } + } + } + + /* if AER supported, restore it */ + if (s->pm_state->aer_base) { + pt_aer_reg_restore(s); + } +} +/* reinitialize all emulate registers */ +static void pt_config_reinit(XenPCIPassthroughState *s) +{ + XenPTRegGroup *reg_grp_entry = NULL; + XenPTReg *reg_entry = NULL; + XenPTRegInfo *reg = NULL; + + /* find emulate register group entry */ + QLIST_FOREACH(reg_grp_entry, &s->reg_grp_tbl, entries) { + /* find emulate register entry */ + QLIST_FOREACH(reg_entry, ®_grp_entry->reg_tbl_list, entries) { + reg = reg_entry->reg; + if (reg->init) { + /* initialize emulate register */ + reg_entry->data = + reg->init(s, reg_entry->reg, + reg_grp_entry->base_offset + reg->offset); + } + } + } +} + +static int pt_init_pci_config(XenPCIPassthroughState *s) +{ + PCIDevice *d = &s->dev; + int ret = 0; + + PT_LOG("Reinitialize PCI configuration registers due to power state" + " transition with internal reset. [%02x:%02x.%x]\n", + pci_bus_num(d->bus), PCI_SLOT(d->devfn), PCI_FUNC(d->devfn)); + + /* restore a part of I/O device register */ + pt_config_restore(s); + + /* reinitialize all emulate register */ + pt_config_reinit(s); + + /* setup MSI-INTx translation if support */ + ret = pt_enable_msi_translate(s); + + /* rebind machine_irq to device */ + if (ret < 0 && s->machine_irq != 0) { + uint8_t e_device = PCI_SLOT(s->dev.devfn); + uint8_t e_intx = pci_intx(s); + + ret = xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, s->machine_irq, 0, + e_device, e_intx); + if (ret < 0) { + PT_LOG("Error: Rebinding of interrupt failed! ret=%d\n", ret); + } + } + + return ret; +} + +static uint8_t find_cap_offset(XenPCIPassthroughState *s, uint8_t cap) +{ + int id; + int max_cap = 48; + int pos = PCI_CAPABILITY_LIST; + int status; + + status = host_pci_read_byte(s->real_device, PCI_STATUS); + if ((status & PCI_STATUS_CAP_LIST) == 0) { + return 0; + } + + while (max_cap--) { + pos = host_pci_read_byte(s->real_device, pos); + if (pos < 0x40) { + break; + } + + pos &= ~3; + id = host_pci_read_byte(s->real_device, pos + PCI_CAP_LIST_ID); + + if (id == 0xff) { + break; + } + if (id == cap) { + return pos; + } + + pos += PCI_CAP_LIST_NEXT; + } + return 0; +} + +static void pt_config_reg_init(XenPCIPassthroughState *s, + XenPTRegGroup *reg_grp, XenPTRegInfo *reg) +{ + XenPTReg *reg_entry; + uint32_t data = 0; + + reg_entry = g_malloc0(sizeof (XenPTReg)); + + reg_entry->reg = reg; + reg_entry->data = 0; + + if (reg->init) { + /* initialize emulate register */ + data = reg->init(s, reg_entry->reg, + reg_grp->base_offset + reg->offset); + if (data == PT_INVALID_REG) { + /* free unused BAR register entry */ + free(reg_entry); + return; + } + /* set register value */ + reg_entry->data = data; + } + /* list add register entry */ + QLIST_INSERT_HEAD(®_grp->reg_tbl_list, reg_entry, entries); + + return; +} + +void pt_config_init(XenPCIPassthroughState *s) +{ + XenPTRegGroup *reg_grp_entry = NULL; + uint32_t reg_grp_offset = 0; + XenPTRegInfo *reg_tbl = NULL; + int i, j; + + QLIST_INIT(&s->reg_grp_tbl); + + for (i = 0; pt_emu_reg_grp_tbl[i].grp_size != 0; i++) { + if (pt_emu_reg_grp_tbl[i].grp_id != 0xFF) { + if (pt_hide_dev_cap(s->real_device, + pt_emu_reg_grp_tbl[i].grp_id)) { + continue; + } + + reg_grp_offset = find_cap_offset(s, pt_emu_reg_grp_tbl[i].grp_id); + + if (!reg_grp_offset) { + continue; + } + } + + reg_grp_entry = g_malloc0(sizeof (XenPTRegGroup)); + QLIST_INIT(®_grp_entry->reg_tbl_list); + QLIST_INSERT_HEAD(&s->reg_grp_tbl, reg_grp_entry, entries); + + reg_grp_entry->base_offset = reg_grp_offset; + reg_grp_entry->reg_grp = pt_emu_reg_grp_tbl + i; + if (pt_emu_reg_grp_tbl[i].size_init) { + /* get register group size */ + reg_grp_entry->size = + pt_emu_reg_grp_tbl[i].size_init(s, reg_grp_entry->reg_grp, + reg_grp_offset); + } + + if (pt_emu_reg_grp_tbl[i].grp_type == GRP_TYPE_EMU) { + if (pt_emu_reg_grp_tbl[i].emu_reg_tbl) { + reg_tbl = pt_emu_reg_grp_tbl[i].emu_reg_tbl; + /* initialize capability register */ + for (j = 0; reg_tbl->size != 0; j++, reg_tbl++) { + /* initialize capability register */ + pt_config_reg_init(s, reg_grp_entry, reg_tbl); + } + } + } + reg_grp_offset = 0; + } + + return; +}